LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <cassert>
41#include <numeric>
42#include <optional>
43
44#define DEBUG_TYPE "legalizer"
45
46using namespace llvm;
47using namespace LegalizeActions;
48using namespace MIPatternMatch;
49
50/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
51///
52/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
53/// with any leftover piece as type \p LeftoverTy
54///
55/// Returns -1 in the first element of the pair if the breakdown is not
56/// satisfiable.
57static std::pair<int, int>
58getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
59 assert(!LeftoverTy.isValid() && "this is an out argument");
60
61 unsigned Size = OrigTy.getSizeInBits();
62 unsigned NarrowSize = NarrowTy.getSizeInBits();
63 unsigned NumParts = Size / NarrowSize;
64 unsigned LeftoverSize = Size - NumParts * NarrowSize;
65 assert(Size > NarrowSize);
66
67 if (LeftoverSize == 0)
68 return {NumParts, 0};
69
70 if (NarrowTy.isVector()) {
71 unsigned EltSize = OrigTy.getScalarSizeInBits();
72 if (LeftoverSize % EltSize != 0)
73 return {-1, -1};
74 LeftoverTy = OrigTy.changeElementCount(
75 ElementCount::getFixed(LeftoverSize / EltSize));
76 } else {
77 LeftoverTy = LLT::integer(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder,
108 const LibcallLoweringInfo *Libcalls)
109 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
110 LI(*MF.getSubtarget().getLegalizerInfo()),
111 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
112
116 const LibcallLoweringInfo *Libcalls,
118 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
119 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
120
123 LostDebugLocObserver &LocObserver) {
124 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
125
126 MIRBuilder.setInstrAndDebugLoc(MI);
127
128 if (isa<GIntrinsic>(MI))
129 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
130 auto Step = LI.getAction(MI, MRI);
131 switch (Step.Action) {
132 case Legal:
133 LLVM_DEBUG(dbgs() << ".. Already legal\n");
134 return AlreadyLegal;
135 case Libcall:
136 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
137 return libcall(MI, LocObserver);
138 case NarrowScalar:
139 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
140 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
141 case WidenScalar:
142 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
143 return widenScalar(MI, Step.TypeIdx, Step.NewType);
144 case Bitcast:
145 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
146 return bitcast(MI, Step.TypeIdx, Step.NewType);
147 case Lower:
148 LLVM_DEBUG(dbgs() << ".. Lower\n");
149 return lower(MI, Step.TypeIdx, Step.NewType);
150 case FewerElements:
151 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
152 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case MoreElements:
154 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
155 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
156 case Custom:
157 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
158 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
160 default:
161 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
162 return UnableToLegalize;
163 }
164}
165
166void LegalizerHelper::insertParts(Register DstReg,
167 LLT ResultTy, LLT PartTy,
168 ArrayRef<Register> PartRegs,
169 LLT LeftoverTy,
170 ArrayRef<Register> LeftoverRegs) {
171 if (!LeftoverTy.isValid()) {
172 assert(LeftoverRegs.empty());
173
174 if (!ResultTy.isVector()) {
175 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
176 return;
177 }
178
179 if (PartTy.isVector())
180 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
181 else
182 MIRBuilder.buildBuildVector(DstReg, PartRegs);
183 return;
184 }
185
186 // Merge sub-vectors with different number of elements and insert into DstReg.
187 if (ResultTy.isVector()) {
188 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
189 SmallVector<Register, 8> AllRegs(PartRegs);
190 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
191 return mergeMixedSubvectors(DstReg, AllRegs);
192 }
193
194 SmallVector<Register> GCDRegs;
195 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
196 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
197 extractGCDType(GCDRegs, GCDTy, PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
199 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
200}
201
202void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
203 Register Reg) {
204 LLT Ty = MRI.getType(Reg);
206 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
207 MIRBuilder, MRI);
208 Elts.append(RegElts);
209}
210
211/// Merge \p PartRegs with different types into \p DstReg.
212void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
213 ArrayRef<Register> PartRegs) {
215 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
216 appendVectorElts(AllElts, PartRegs[i]);
217
218 Register Leftover = PartRegs[PartRegs.size() - 1];
219 if (!MRI.getType(Leftover).isVector())
220 AllElts.push_back(Leftover);
221 else
222 appendVectorElts(AllElts, Leftover);
223
224 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
225}
226
227/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
229 const MachineInstr &MI) {
230 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231
232 const int StartIdx = Regs.size();
233 const int NumResults = MI.getNumOperands() - 1;
234 Regs.resize(Regs.size() + NumResults);
235 for (int I = 0; I != NumResults; ++I)
236 Regs[StartIdx + I] = MI.getOperand(I).getReg();
237}
238
239void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
240 LLT GCDTy, Register SrcReg) {
241 LLT SrcTy = MRI.getType(SrcReg);
242 if (SrcTy == GCDTy) {
243 // If the source already evenly divides the result type, we don't need to do
244 // anything.
245 Parts.push_back(SrcReg);
246 } else {
247 // Need to split into common type sized pieces.
248 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
249 getUnmergeResults(Parts, *Unmerge);
250 }
251}
252
253LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
254 LLT NarrowTy, Register SrcReg) {
255 LLT SrcTy = MRI.getType(SrcReg);
256 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
257 extractGCDType(Parts, GCDTy, SrcReg);
258 return GCDTy;
259}
260
261LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
263 unsigned PadStrategy) {
264 LLT LCMTy = getLCMType(DstTy, NarrowTy);
265
266 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
267 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
268 int NumOrigSrc = VRegs.size();
269
270 Register PadReg;
271
272 // Get a value we can use to pad the source value if the sources won't evenly
273 // cover the result type.
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
279 else {
280 assert(PadStrategy == TargetOpcode::G_SEXT);
281
282 // Shift the sign bit of the low register through the high register.
283 auto ShiftAmt =
284 MIRBuilder.buildConstant(LLT::integer(64), GCDTy.getSizeInBits() - 1);
285 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
286 }
287 }
288
289 // Registers for the final merge to be produced.
290 SmallVector<Register, 4> Remerge(NumParts);
291
292 // Registers needed for intermediate merges, which will be merged into a
293 // source for Remerge.
294 SmallVector<Register, 4> SubMerge(NumSubParts);
295
296 // Once we've fully read off the end of the original source bits, we can reuse
297 // the same high bits for remaining padding elements.
298 Register AllPadReg;
299
300 // Build merges to the LCM type to cover the original result type.
301 for (int I = 0; I != NumParts; ++I) {
302 bool AllMergePartsArePadding = true;
303
304 // Build the requested merges to the requested type.
305 for (int J = 0; J != NumSubParts; ++J) {
306 int Idx = I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
309 continue;
310 }
311
312 SubMerge[J] = VRegs[Idx];
313
314 // There are meaningful bits here we can't reuse later.
315 AllMergePartsArePadding = false;
316 }
317
318 // If we've filled up a complete piece with padding bits, we can directly
319 // emit the natural sized constant if applicable, rather than a merge of
320 // smaller constants.
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
326
327 // If this is a sign extension, we can't materialize a trivial constant
328 // with the right type and have to produce a merge.
329 }
330
331 if (AllPadReg) {
332 // Avoid creating additional instructions if we're just adding additional
333 // copies of padding bits.
334 Remerge[I] = AllPadReg;
335 continue;
336 }
337
338 if (NumSubParts == 1)
339 Remerge[I] = SubMerge[0];
340 else
341 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
342
343 // In the sign extend padding case, re-use the first all-signbit merge.
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[I];
346 }
347
348 VRegs = std::move(Remerge);
349 return LCMTy;
350}
351
352void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
353 ArrayRef<Register> RemergeRegs) {
354 LLT DstTy = MRI.getType(DstReg);
355
356 // Create the merge to the widened source, and extract the relevant bits into
357 // the result.
358
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
361 return;
362 }
363
364 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
365 if (DstTy.isScalar() && LCMTy.isScalar()) {
366 MIRBuilder.buildTrunc(DstReg, Remerge);
367 return;
368 }
369
370 if (LCMTy.isVector()) {
371 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
372 SmallVector<Register, 8> UnmergeDefs(NumDefs);
373 UnmergeDefs[0] = DstReg;
374 for (unsigned I = 1; I != NumDefs; ++I)
375 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
376
377 MIRBuilder.buildUnmerge(UnmergeDefs,
378 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
379 return;
380 }
381
382 llvm_unreachable("unhandled case");
383}
384
385static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
386#define RTLIBCASE_INT(LibcallPrefix) \
387 do { \
388 switch (Size) { \
389 case 32: \
390 return RTLIB::LibcallPrefix##32; \
391 case 64: \
392 return RTLIB::LibcallPrefix##64; \
393 case 128: \
394 return RTLIB::LibcallPrefix##128; \
395 default: \
396 llvm_unreachable("unexpected size"); \
397 } \
398 } while (0)
399
400#define RTLIBCASE(LibcallPrefix) \
401 do { \
402 switch (Size) { \
403 case 32: \
404 return RTLIB::LibcallPrefix##32; \
405 case 64: \
406 return RTLIB::LibcallPrefix##64; \
407 case 80: \
408 return RTLIB::LibcallPrefix##80; \
409 case 128: \
410 return RTLIB::LibcallPrefix##128; \
411 default: \
412 llvm_unreachable("unexpected size"); \
413 } \
414 } while (0)
415
416 switch (Opcode) {
417 case TargetOpcode::G_LROUND:
418 RTLIBCASE(LROUND_F);
419 case TargetOpcode::G_LLROUND:
420 RTLIBCASE(LLROUND_F);
421 case TargetOpcode::G_MUL:
422 RTLIBCASE_INT(MUL_I);
423 case TargetOpcode::G_SDIV:
424 RTLIBCASE_INT(SDIV_I);
425 case TargetOpcode::G_UDIV:
426 RTLIBCASE_INT(UDIV_I);
427 case TargetOpcode::G_SREM:
428 RTLIBCASE_INT(SREM_I);
429 case TargetOpcode::G_UREM:
430 RTLIBCASE_INT(UREM_I);
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
432 RTLIBCASE_INT(CTLZ_I);
433 case TargetOpcode::G_FADD:
434 RTLIBCASE(ADD_F);
435 case TargetOpcode::G_FSUB:
436 RTLIBCASE(SUB_F);
437 case TargetOpcode::G_FMUL:
438 RTLIBCASE(MUL_F);
439 case TargetOpcode::G_FDIV:
440 RTLIBCASE(DIV_F);
441 case TargetOpcode::G_FEXP:
442 RTLIBCASE(EXP_F);
443 case TargetOpcode::G_FEXP2:
444 RTLIBCASE(EXP2_F);
445 case TargetOpcode::G_FEXP10:
446 RTLIBCASE(EXP10_F);
447 case TargetOpcode::G_FREM:
448 RTLIBCASE(REM_F);
449 case TargetOpcode::G_FPOW:
450 RTLIBCASE(POW_F);
451 case TargetOpcode::G_FPOWI:
452 RTLIBCASE(POWI_F);
453 case TargetOpcode::G_FMA:
454 RTLIBCASE(FMA_F);
455 case TargetOpcode::G_FSIN:
456 RTLIBCASE(SIN_F);
457 case TargetOpcode::G_FCOS:
458 RTLIBCASE(COS_F);
459 case TargetOpcode::G_FTAN:
460 RTLIBCASE(TAN_F);
461 case TargetOpcode::G_FASIN:
462 RTLIBCASE(ASIN_F);
463 case TargetOpcode::G_FACOS:
464 RTLIBCASE(ACOS_F);
465 case TargetOpcode::G_FATAN:
466 RTLIBCASE(ATAN_F);
467 case TargetOpcode::G_FATAN2:
468 RTLIBCASE(ATAN2_F);
469 case TargetOpcode::G_FSINH:
470 RTLIBCASE(SINH_F);
471 case TargetOpcode::G_FCOSH:
472 RTLIBCASE(COSH_F);
473 case TargetOpcode::G_FTANH:
474 RTLIBCASE(TANH_F);
475 case TargetOpcode::G_FSINCOS:
476 RTLIBCASE(SINCOS_F);
477 case TargetOpcode::G_FMODF:
478 RTLIBCASE(MODF_F);
479 case TargetOpcode::G_FLOG10:
480 RTLIBCASE(LOG10_F);
481 case TargetOpcode::G_FLOG:
482 RTLIBCASE(LOG_F);
483 case TargetOpcode::G_FLOG2:
484 RTLIBCASE(LOG2_F);
485 case TargetOpcode::G_FLDEXP:
486 RTLIBCASE(LDEXP_F);
487 case TargetOpcode::G_FCEIL:
488 RTLIBCASE(CEIL_F);
489 case TargetOpcode::G_FFLOOR:
490 RTLIBCASE(FLOOR_F);
491 case TargetOpcode::G_FMINNUM:
492 RTLIBCASE(FMIN_F);
493 case TargetOpcode::G_FMAXNUM:
494 RTLIBCASE(FMAX_F);
495 case TargetOpcode::G_FMINIMUMNUM:
496 RTLIBCASE(FMINIMUM_NUM_F);
497 case TargetOpcode::G_FMAXIMUMNUM:
498 RTLIBCASE(FMAXIMUM_NUM_F);
499 case TargetOpcode::G_FSQRT:
500 RTLIBCASE(SQRT_F);
501 case TargetOpcode::G_FRINT:
502 RTLIBCASE(RINT_F);
503 case TargetOpcode::G_FNEARBYINT:
504 RTLIBCASE(NEARBYINT_F);
505 case TargetOpcode::G_INTRINSIC_TRUNC:
506 RTLIBCASE(TRUNC_F);
507 case TargetOpcode::G_INTRINSIC_ROUND:
508 RTLIBCASE(ROUND_F);
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 RTLIBCASE(ROUNDEVEN_F);
511 case TargetOpcode::G_INTRINSIC_LRINT:
512 RTLIBCASE(LRINT_F);
513 case TargetOpcode::G_INTRINSIC_LLRINT:
514 RTLIBCASE(LLRINT_F);
515 }
516 llvm_unreachable("Unknown libcall function");
517#undef RTLIBCASE_INT
518#undef RTLIBCASE
519}
520
521/// True if an instruction is in tail position in its caller. Intended for
522/// legalizing libcalls as tail calls when possible.
525 const TargetInstrInfo &TII,
526 MachineRegisterInfo &MRI) {
527 MachineBasicBlock &MBB = *MI.getParent();
528 const Function &F = MBB.getParent()->getFunction();
529
530 // Conservatively require the attributes of the call to match those of
531 // the return. Ignore NoAlias and NonNull because they don't affect the
532 // call sequence.
533 AttributeList CallerAttrs = F.getAttributes();
534 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Attribute::NoAlias)
536 .removeAttribute(Attribute::NonNull)
537 .hasAttributes())
538 return false;
539
540 // It's not safe to eliminate the sign / zero extension of the return value.
541 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Attribute::SExt))
543 return false;
544
545 // Only tail call if the following instruction is a standard return or if we
546 // have a `thisreturn` callee, and a sequence like:
547 //
548 // G_MEMCPY %0, %1, %2
549 // $x0 = COPY %0
550 // RET_ReallyLR implicit $x0
551 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
552 if (Next != MBB.instr_end() && Next->isCopy()) {
553 if (MI.getOpcode() == TargetOpcode::G_BZERO)
554 return false;
555
556 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
557 // mempy/etc routines return the same parameter. For other it will be the
558 // returned value.
559 Register VReg = MI.getOperand(0).getReg();
560 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
561 return false;
562
563 Register PReg = Next->getOperand(0).getReg();
564 if (!PReg.isPhysical())
565 return false;
566
567 auto Ret = next_nodbg(Next, MBB.instr_end());
568 if (Ret == MBB.instr_end() || !Ret->isReturn())
569 return false;
570
571 if (Ret->getNumImplicitOperands() != 1)
572 return false;
573
574 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
575 return false;
576
577 // Skip over the COPY that we just validated.
578 Next = Ret;
579 }
580
581 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
582 return false;
583
584 return true;
585}
586
588 const char *Name, const CallLowering::ArgInfo &Result,
590 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
591 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
592
594 Info.CallConv = CC;
595 Info.Callee = MachineOperand::CreateES(Name);
596 Info.OrigRet = Result;
597 if (MI)
598 Info.IsTailCall =
599 (Result.Ty->isVoidTy() ||
600 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
601 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
602 *MIRBuilder.getMRI());
603
604 llvm::append_range(Info.OrigArgs, Args);
605 if (!CLI.lowerCall(MIRBuilder, Info))
607
608 if (MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
610
611 // Check debug locations before removing the return.
612 LocObserver.checkpoint(true);
613
614 // We must have a return following the call (or debug insts) to get past
615 // isLibCallInTailPosition.
616 do {
617 MachineInstr *Next = MI->getNextNode();
618 assert(Next &&
619 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
621 // We lowered a tail call, so the call is now the return from the block.
622 // Delete the old return.
623 Next->eraseFromParent();
624 } while (MI->getNextNode());
625
626 // We expect to lose the debug location from the return.
627 LocObserver.checkpoint(false);
628 }
630}
631
633 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
635 MachineInstr *MI) const {
636 if (!Libcalls)
638
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
642
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
645 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
646}
647
648// Useful for libcalls where all operands have the same type.
651 unsigned Size, Type *OpType,
652 LostDebugLocObserver &LocObserver) const {
653 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
654
655 // FIXME: What does the original arg index mean here?
657 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
658 Args.push_back({MO.getReg(), OpType, 0});
659 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
660 LocObserver, &MI);
661}
662
663LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
664 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
665 LostDebugLocObserver &LocObserver) {
666 MachineFunction &MF = *MI.getMF();
668
669 Register DstSin = MI.getOperand(0).getReg();
670 Register DstCos = MI.getOperand(1).getReg();
671 Register Src = MI.getOperand(2).getReg();
672 LLT DstTy = MRI.getType(DstSin);
673
674 int MemSize = DstTy.getSizeInBytes();
675 Align Alignment = getStackTemporaryAlignment(DstTy);
677 unsigned AddrSpace = DL.getAllocaAddrSpace();
678 MachinePointerInfo PtrInfo;
679
680 Register StackPtrSin =
681 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
682 .getReg(0);
683 Register StackPtrCos =
684 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
685 .getReg(0);
686
687 auto &Ctx = MF.getFunction().getContext();
688 auto LibcallResult = createLibcall(
689 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
690 {{Src, OpType, 0},
691 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
692 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
693 LocObserver, &MI);
694
695 if (LibcallResult != LegalizeResult::Legalized)
697
699 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
702
703 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
704 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
705 MI.eraseFromParent();
706
708}
709
711LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
712 unsigned Size, Type *OpType,
713 LostDebugLocObserver &LocObserver) {
714 MachineFunction &MF = MIRBuilder.getMF();
715 MachineRegisterInfo &MRI = MF.getRegInfo();
716
717 Register DstFrac = MI.getOperand(0).getReg();
718 Register DstInt = MI.getOperand(1).getReg();
719 Register Src = MI.getOperand(2).getReg();
720 LLT DstTy = MRI.getType(DstFrac);
721
722 int MemSize = DstTy.getSizeInBytes();
723 Align Alignment = getStackTemporaryAlignment(DstTy);
724 const DataLayout &DL = MIRBuilder.getDataLayout();
725 unsigned AddrSpace = DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
727
728 Register StackPtrInt =
729 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
730 .getReg(0);
731
732 auto &Ctx = MF.getFunction().getContext();
733 auto LibcallResult = createLibcall(
734 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
735 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
736 LocObserver, &MI);
737
738 if (LibcallResult != LegalizeResult::Legalized)
740
742 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
743
744 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
745 MI.eraseFromParent();
746
748}
749
750static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
751 Type *FromType) {
752 auto ToMVT = MVT::getVT(ToType);
753 auto FromMVT = MVT::getVT(FromType);
754
755 switch (Opcode) {
756 case TargetOpcode::G_FPEXT:
757 return RTLIB::getFPEXT(FromMVT, ToMVT);
758 case TargetOpcode::G_FPTRUNC:
759 return RTLIB::getFPROUND(FromMVT, ToMVT);
760 case TargetOpcode::G_FPTOSI:
761 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
762 case TargetOpcode::G_FPTOUI:
763 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
764 case TargetOpcode::G_SITOFP:
765 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
766 case TargetOpcode::G_UITOFP:
767 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
768 }
769 llvm_unreachable("Unsupported libcall function");
770}
771
773 MachineInstr &MI, Type *ToType, Type *FromType,
774 LostDebugLocObserver &LocObserver, bool IsSigned) const {
775 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
778 Arg.Flags[0].setSExt();
779 else
780 Arg.Flags[0].setZExt();
781 }
782
783 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
784 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
785 LocObserver, &MI);
786}
787
790 LostDebugLocObserver &LocObserver) const {
791 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
792
794 // Add all the args, except for the last which is an imm denoting 'tail'.
795 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
796 Register Reg = MI.getOperand(i).getReg();
797
798 // Need derive an IR type for call lowering.
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy = nullptr;
801 if (OpLLT.isPointer())
802 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
803 else
804 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
805 Args.push_back({Reg, OpTy, 0});
806 }
807
808 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc = MI.getOpcode();
811 switch (Opc) {
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
814 break;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
818 break;
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
822 break;
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
826 break;
827 default:
828 llvm_unreachable("unsupported opcode");
829 }
830
831 if (!Libcalls) // FIXME: Should be mandatory
833
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
835
836 // Unsupported libcall on the target.
837 if (RTLibcallImpl == RTLIB::Unsupported) {
838 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
839 << MIRBuilder.getTII().getName(Opc) << "\n");
841 }
842
844 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
845
846 StringRef LibcallName =
848 Info.Callee = MachineOperand::CreateES(LibcallName.data());
849 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
850 Info.IsTailCall =
851 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
852 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
853
854 llvm::append_range(Info.OrigArgs, Args);
855 if (!CLI.lowerCall(MIRBuilder, Info))
857
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
860
861 // Check debug locations before removing the return.
862 LocObserver.checkpoint(true);
863
864 // We must have a return following the call (or debug insts) to get past
865 // isLibCallInTailPosition.
866 do {
867 MachineInstr *Next = MI.getNextNode();
868 assert(Next &&
869 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
871 // We lowered a tail call, so the call is now the return from the block.
872 // Delete the old return.
873 Next->eraseFromParent();
874 } while (MI.getNextNode());
875
876 // We expect to lose the debug location from the return.
877 LocObserver.checkpoint(false);
878 }
879
881}
882
883static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
884 unsigned Opc = MI.getOpcode();
885 auto &AtomicMI = cast<GMemOperation>(MI);
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
889 uint64_t MemSize = MemType.getSizeInBytes();
890 if (MemType.isVector())
891 return RTLIB::UNKNOWN_LIBCALL;
892
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894#define LCALL5(A) \
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 switch (Opc) {
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 }
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 }
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 }
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 }
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 }
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Ordering, MemSize);
922 }
923 default:
924 return RTLIB::UNKNOWN_LIBCALL;
925 }
926#undef LCALLS
927#undef LCALL5
928}
929
932 auto &Ctx = MIRBuilder.getContext();
933
934 Type *RetTy;
935 SmallVector<Register> RetRegs;
937 unsigned Opc = MI.getOpcode();
938 switch (Opc) {
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 LLT SuccessLLT;
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
945 RetRegs.push_back(Ret);
946 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
947 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
949 NewLLT) = MI.getFirst5RegLLTs();
950 RetRegs.push_back(Success);
951 RetTy = StructType::get(
952 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
953 }
954 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
955 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
956 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
957 break;
958 }
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
966 RetRegs.push_back(Ret);
967 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
968 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
969 Val =
970 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
971 .getReg(0);
972 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
973 Val =
974 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
975 .getReg(0);
976 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
977 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
978 break;
979 }
980 default:
981 llvm_unreachable("unsupported opcode");
982 }
983
984 if (!Libcalls) // FIXME: Should be mandatory
986
987 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
990
991 // Unsupported libcall on the target.
992 if (RTLibcallImpl == RTLIB::Unsupported) {
993 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
994 << MIRBuilder.getTII().getName(Opc) << "\n");
996 }
997
999 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1000
1001 StringRef LibcallName =
1003 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1004 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1005
1006 llvm::append_range(Info.OrigArgs, Args);
1007 if (!CLI.lowerCall(MIRBuilder, Info))
1009
1011}
1012
1013static RTLIB::Libcall
1015 RTLIB::Libcall RTLibcall;
1016 switch (MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1019 break;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1023 break;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1026 break;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1030 break;
1031 default:
1032 llvm_unreachable("Unexpected opcode");
1033 }
1034 return RTLibcall;
1035}
1036
1037// Some library functions that read FP state (fegetmode, fegetenv) write the
1038// state into a region in memory. IR intrinsics that do the same operations
1039// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1040// intrinsics via the library functions, we need to use temporary variable,
1041// for example:
1042//
1043// %0:_(s32) = G_GET_FPMODE
1044//
1045// is transformed to:
1046//
1047// %1:_(p0) = G_FRAME_INDEX %stack.0
1048// BL &fegetmode
1049// %0:_(s32) = G_LOAD % 1
1050//
1052LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1053 LostDebugLocObserver &LocObserver) {
1054 const DataLayout &DL = MIRBuilder.getDataLayout();
1055 auto &MF = MIRBuilder.getMF();
1056 auto &MRI = *MIRBuilder.getMRI();
1057 auto &Ctx = MF.getFunction().getContext();
1058
1059 // Create temporary, where library function will put the read state.
1060 Register Dst = MI.getOperand(0).getReg();
1061 LLT StateTy = MRI.getType(Dst);
1062 TypeSize StateSize = StateTy.getSizeInBytes();
1063 Align TempAlign = getStackTemporaryAlignment(StateTy);
1064 MachinePointerInfo TempPtrInfo;
1065 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1066
1067 // Create a call to library function, with the temporary as an argument.
1068 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1069 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1070 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1071 auto Res = createLibcall(
1072 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1073 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1074 nullptr);
1075 if (Res != LegalizerHelper::Legalized)
1076 return Res;
1077
1078 // Create a load from the temporary.
1079 MachineMemOperand *MMO = MF.getMachineMemOperand(
1080 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1081 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1082
1084}
1085
1086// Similar to `createGetStateLibcall` the function calls a library function
1087// using transient space in stack. In this case the library function reads
1088// content of memory region.
1090LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1091 LostDebugLocObserver &LocObserver) {
1092 const DataLayout &DL = MIRBuilder.getDataLayout();
1093 auto &MF = MIRBuilder.getMF();
1094 auto &MRI = *MIRBuilder.getMRI();
1095 auto &Ctx = MF.getFunction().getContext();
1096
1097 // Create temporary, where library function will get the new state.
1098 Register Src = MI.getOperand(0).getReg();
1099 LLT StateTy = MRI.getType(Src);
1100 TypeSize StateSize = StateTy.getSizeInBytes();
1101 Align TempAlign = getStackTemporaryAlignment(StateTy);
1102 MachinePointerInfo TempPtrInfo;
1103 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1104
1105 // Put the new state into the temporary.
1106 MachineMemOperand *MMO = MF.getMachineMemOperand(
1107 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1108 MIRBuilder.buildStore(Src, Temp, *MMO);
1109
1110 // Create a call to library function, with the temporary as an argument.
1111 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1112 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1113 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1114 return createLibcall(RTLibcall,
1115 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1116 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1117 LocObserver, nullptr);
1118}
1119
1120/// Returns the corresponding libcall for the given Pred and
1121/// the ICMP predicate that should be generated to compare with #0
1122/// after the libcall.
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1126 do { \
1127 switch (Size) { \
1128 case 32: \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 case 64: \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 case 128: \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 default: \
1135 llvm_unreachable("unexpected size"); \
1136 } \
1137 } while (0)
1138
1139 switch (Pred) {
1140 case CmpInst::FCMP_OEQ:
1142 case CmpInst::FCMP_UNE:
1144 case CmpInst::FCMP_OGE:
1146 case CmpInst::FCMP_OLT:
1148 case CmpInst::FCMP_OLE:
1150 case CmpInst::FCMP_OGT:
1152 case CmpInst::FCMP_UNO:
1154 default:
1155 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1156 }
1157}
1158
1160LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1161 LostDebugLocObserver &LocObserver) {
1162 auto &MF = MIRBuilder.getMF();
1163 auto &Ctx = MF.getFunction().getContext();
1164 const GFCmp *Cmp = cast<GFCmp>(&MI);
1165
1166 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1167 unsigned Size = OpLLT.getSizeInBits();
1168 if ((Size != 32 && Size != 64 && Size != 128) ||
1169 OpLLT != MRI.getType(Cmp->getRHSReg()))
1170 return UnableToLegalize;
1171
1172 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1173
1174 // DstReg type is s32
1175 const Register DstReg = Cmp->getReg(0);
1176 LLT DstTy = MRI.getType(DstReg);
1177 const auto Cond = Cmp->getCond();
1178
1179 // Reference:
1180 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1181 // Generates a libcall followed by ICMP.
1182 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1183 const CmpInst::Predicate ICmpPred,
1184 const DstOp &Res) -> Register {
1185 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1186 LLT TempLLT = LLT::integer(32);
1187 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1188 // Generate libcall, holding result in Temp
1189 const auto Status = createLibcall(
1190 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1191 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1192 LocObserver, &MI);
1193 if (!Status)
1194 return {};
1195
1196 // Compare temp with #0 to get the final result.
1197 return MIRBuilder
1198 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1199 .getReg(0);
1200 };
1201
1202 // Simple case if we have a direct mapping from predicate to libcall
1203 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1206 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1207 return Legalized;
1208 }
1209 return UnableToLegalize;
1210 }
1211
1212 // No direct mapping found, should be generated as combination of libcalls.
1213
1214 switch (Cond) {
1215 case CmpInst::FCMP_UEQ: {
1216 // FCMP_UEQ: unordered or equal
1217 // Convert into (FCMP_OEQ || FCMP_UNO).
1218
1219 const auto [OeqLibcall, OeqPred] =
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222
1223 const auto [UnoLibcall, UnoPred] =
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1226 if (Oeq && Uno)
1227 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1228 else
1229 return UnableToLegalize;
1230
1231 break;
1232 }
1233 case CmpInst::FCMP_ONE: {
1234 // FCMP_ONE: ordered and operands are unequal
1235 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1236
1237 // We inverse the predicate instead of generating a NOT
1238 // to save one instruction.
1239 // On AArch64 isel can even select two cmp into a single ccmp.
1240 const auto [OeqLibcall, OeqPred] =
1242 const auto NotOeq =
1243 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1244
1245 const auto [UnoLibcall, UnoPred] =
1247 const auto NotUno =
1248 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1249
1250 if (NotOeq && NotUno)
1251 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1252 else
1253 return UnableToLegalize;
1254
1255 break;
1256 }
1257 case CmpInst::FCMP_ULT:
1258 case CmpInst::FCMP_UGE:
1259 case CmpInst::FCMP_UGT:
1260 case CmpInst::FCMP_ULE:
1261 case CmpInst::FCMP_ORD: {
1262 // Convert into: !(inverse(Pred))
1263 // E.g. FCMP_ULT becomes !FCMP_OGE
1264 // This is equivalent to the following, but saves some instructions.
1265 // MIRBuilder.buildNot(
1266 // PredTy,
1267 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1268 // Op1, Op2));
1269 const auto [InversedLibcall, InversedPred] =
1271 if (!BuildLibcall(InversedLibcall,
1272 CmpInst::getInversePredicate(InversedPred), DstReg))
1273 return UnableToLegalize;
1274 break;
1275 }
1276 default:
1277 return UnableToLegalize;
1278 }
1279
1280 return Legalized;
1281}
1282
1283// The function is used to legalize operations that set default environment
1284// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1285// On most targets supported in glibc FE_DFL_MODE is defined as
1286// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1287// it is not true, the target must provide custom lowering.
1289LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1290 LostDebugLocObserver &LocObserver) {
1291 const DataLayout &DL = MIRBuilder.getDataLayout();
1292 auto &MF = MIRBuilder.getMF();
1293 auto &Ctx = MF.getFunction().getContext();
1294
1295 // Create an argument for the library function.
1296 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1297 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1298 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1299 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1300 auto DefValue = MIRBuilder.buildConstant(LLT::integer(PtrSize), -1LL);
1301 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1302 MIRBuilder.buildIntToPtr(Dest, DefValue);
1303
1304 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1305 return createLibcall(
1306 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1307 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1308}
1309
1312 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1313
1314 switch (MI.getOpcode()) {
1315 default:
1316 return UnableToLegalize;
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1324 unsigned Size = LLTy.getSizeInBits();
1325 Type *HLTy = IntegerType::get(Ctx, Size);
1326 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1327 if (Status != Legalized)
1328 return Status;
1329 break;
1330 }
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1367 unsigned Size = LLTy.getSizeInBits();
1368 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1369 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1370 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1371 return UnableToLegalize;
1372 }
1373 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1374 if (Status != Legalized)
1375 return Status;
1376 break;
1377 }
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1380 unsigned Size = LLTy.getSizeInBits();
1381 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1382 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1383 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1384 return UnableToLegalize;
1385 }
1386 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1387 }
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1390 unsigned Size = LLTy.getSizeInBits();
1391 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1392 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1393 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1394 return UnableToLegalize;
1395 }
1396 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1397 }
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1403 unsigned Size = LLTy.getSizeInBits();
1404 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1405 Type *ITy = IntegerType::get(
1406 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1407 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1408 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1409 return UnableToLegalize;
1410 }
1411 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1413 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1414 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1415 if (Status != Legalized)
1416 return Status;
1417 MI.eraseFromParent();
1418 return Legalized;
1419 }
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1423 unsigned Size = LLTy.getSizeInBits();
1424 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1425 Type *ITy = IntegerType::get(
1426 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1427 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1428 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1429 return UnableToLegalize;
1430 }
1431 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1433 {MI.getOperand(1).getReg(), HLTy, 0},
1434 {MI.getOperand(2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1437 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1438 if (Status != Legalized)
1439 return Status;
1440 break;
1441 }
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1444 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1445 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1446 if (!FromTy || !ToTy)
1447 return UnableToLegalize;
1448 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 FromTy, LocObserver);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1484 if (Status != Legalized)
1485 return Status;
1486 break;
1487 }
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1497 if (Status != Legalized)
1498 return Status;
1499 break;
1500 }
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1505 LegalizeResult Result =
1506 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1507 if (Result != Legalized)
1508 return Result;
1509 MI.eraseFromParent();
1510 return Result;
1511 }
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1514 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1515 if (Result != Legalized)
1516 return Result;
1517 break;
1518 }
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1521 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1522 if (Result != Legalized)
1523 return Result;
1524 break;
1525 }
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1528 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1529 if (Result != Legalized)
1530 return Result;
1531 break;
1532 }
1533 }
1534
1535 MI.eraseFromParent();
1536 return Legalized;
1537}
1538
1540 unsigned TypeIdx,
1541 LLT NarrowTy) {
1542 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1543 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1544
1545 switch (MI.getOpcode()) {
1546 default:
1547 return UnableToLegalize;
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1549 Register DstReg = MI.getOperand(0).getReg();
1550 LLT DstTy = MRI.getType(DstReg);
1551
1552 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1553 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1554 // FIXME: Although this would also be legal for the general case, it causes
1555 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1556 // combines not being hit). This seems to be a problem related to the
1557 // artifact combiner.
1558 if (SizeOp0 % NarrowSize != 0) {
1559 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1560 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1561 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1562
1563 MI.eraseFromParent();
1564 return Legalized;
1565 }
1566
1567 int NumParts = SizeOp0 / NarrowSize;
1568
1570 for (int i = 0; i < NumParts; ++i)
1571 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1572
1573 if (DstTy.isVector())
1574 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 else
1576 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1577 MI.eraseFromParent();
1578 return Legalized;
1579 }
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1582 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1584 unsigned NarrowSize = NarrowTy.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1586
1587 SmallVector<Register, 4> PartRegs;
1588 for (int I = 0; I != NumParts; ++I) {
1589 unsigned Offset = I * NarrowSize;
1590 auto K = MIRBuilder.buildConstant(NarrowTy,
1591 Val.lshr(Offset).trunc(NarrowSize));
1592 PartRegs.push_back(K.getReg(0));
1593 }
1594
1595 LLT LeftoverTy;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 SmallVector<Register, 1> LeftoverRegs;
1598 if (LeftoverBits != 0) {
1599 LeftoverTy = LLT::scalar(LeftoverBits);
1600 auto K = MIRBuilder.buildConstant(
1601 LeftoverTy,
1602 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1603 LeftoverRegs.push_back(K.getReg(0));
1604 }
1605
1606 insertParts(MI.getOperand(0).getReg(),
1607 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608
1609 MI.eraseFromParent();
1610 return Legalized;
1611 }
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1615 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1616 case TargetOpcode::G_TRUNC: {
1617 if (TypeIdx != 1)
1618 return UnableToLegalize;
1619
1620 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1621 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1622 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1623 return UnableToLegalize;
1624 }
1625
1626 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1627 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1628 MI.eraseFromParent();
1629 return Legalized;
1630 }
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1633 if (TypeIdx != 0)
1634 return UnableToLegalize;
1635
1636 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1637 // Should widen scalar first
1638 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1639 return UnableToLegalize;
1640
1641 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1643 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 Parts.push_back(
1645 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1646 .getReg(0));
1647 }
1648
1649 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1650 MI.eraseFromParent();
1651 return Legalized;
1652 }
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1663 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1666 return narrowScalarMul(MI, NarrowTy);
1667 case TargetOpcode::G_EXTRACT:
1668 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_INSERT:
1670 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1671 case TargetOpcode::G_LOAD: {
1672 auto &LoadMI = cast<GLoad>(MI);
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(DstReg);
1675 if (DstTy.isVector())
1676 return UnableToLegalize;
1677
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1680 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1682 LoadMI.eraseFromParent();
1683 return Legalized;
1684 }
1685
1686 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1687 }
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1691 auto &LoadMI = cast<GExtLoad>(MI);
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1694
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1697 unsigned MemSize = MMO.getSizeInBits().getValue();
1698
1699 if (MemSize == NarrowSize) {
1700 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1701 } else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 } else if (MemSize > NarrowSize) {
1704 // FIXME: Need to split the load.
1705 return UnableToLegalize;
1706 }
1707
1708 if (isa<GZExtLoad>(LoadMI))
1709 MIRBuilder.buildZExt(DstReg, TmpReg);
1710 else if (isa<GSExtLoad>(LoadMI))
1711 MIRBuilder.buildSExt(DstReg, TmpReg);
1712 else
1713 MIRBuilder.buildFPExt(DstReg, TmpReg);
1714
1715 LoadMI.eraseFromParent();
1716 return Legalized;
1717 }
1718 case TargetOpcode::G_STORE: {
1719 auto &StoreMI = cast<GStore>(MI);
1720
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(SrcReg);
1723 if (SrcTy.isVector())
1724 return UnableToLegalize;
1725
1726 int NumParts = SizeOp0 / NarrowSize;
1727 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1730 return UnableToLegalize;
1731
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1734 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1735 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1737 return Legalized;
1738 }
1739
1740 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1741 }
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1743 auto &StoreMI = cast<GFPTruncStore>(MI);
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1746
1747 auto &MMO = StoreMI.getMMO();
1748 unsigned MemSize = MMO.getSizeInBits().getValue();
1749 if (MemSize > NarrowSize) {
1750 return UnableToLegalize;
1751 }
1752
1753 auto TmpReg = MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1754 if (MemSize == NarrowSize) {
1755 MIRBuilder.buildStore(TmpReg, PtrReg, MMO);
1756 } else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1758 MMO);
1759 }
1760
1761 StoreMI.eraseFromParent();
1762 return Legalized;
1763 }
1764 case TargetOpcode::G_SELECT:
1765 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1769 // Legalize bitwise operation:
1770 // A = BinOp<Ty> B, C
1771 // into:
1772 // B1, ..., BN = G_UNMERGE_VALUES B
1773 // C1, ..., CN = G_UNMERGE_VALUES C
1774 // A1 = BinOp<Ty/N> B1, C2
1775 // ...
1776 // AN = BinOp<Ty/N> BN, CN
1777 // A = G_MERGE_VALUES A1, ..., AN
1778 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1779 }
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1783 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1790 if (TypeIdx == 1)
1791 switch (MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1794 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1797 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1798 case TargetOpcode::G_CTPOP:
1799 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1800 case TargetOpcode::G_CTLS:
1801 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1802 default:
1803 return UnableToLegalize;
1804 }
1805
1806 Observer.changingInstr(MI);
1807 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1808 Observer.changedInstr(MI);
1809 return Legalized;
1810 case TargetOpcode::G_INTTOPTR:
1811 if (TypeIdx != 1)
1812 return UnableToLegalize;
1813
1814 Observer.changingInstr(MI);
1815 narrowScalarSrc(MI, NarrowTy, 1);
1816 Observer.changedInstr(MI);
1817 return Legalized;
1818 case TargetOpcode::G_PTRTOINT:
1819 if (TypeIdx != 0)
1820 return UnableToLegalize;
1821
1822 Observer.changingInstr(MI);
1823 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1824 Observer.changedInstr(MI);
1825 return Legalized;
1826 case TargetOpcode::G_PHI: {
1827 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1828 // NarrowSize.
1829 if (SizeOp0 % NarrowSize != 0)
1830 return UnableToLegalize;
1831
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1833 SmallVector<Register, 2> DstRegs(NumParts);
1834 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1835 Observer.changingInstr(MI);
1836 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1837 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1838 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1839 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1840 SrcRegs[i / 2], MIRBuilder, MRI);
1841 }
1842 MachineBasicBlock &MBB = *MI.getParent();
1843 MIRBuilder.setInsertPt(MBB, MI);
1844 for (unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1847 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1848 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1849 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1850 }
1851 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1852 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1853 Observer.changedInstr(MI);
1854 MI.eraseFromParent();
1855 return Legalized;
1856 }
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1859 if (TypeIdx != 2)
1860 return UnableToLegalize;
1861
1862 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1863 Observer.changingInstr(MI);
1864 narrowScalarSrc(MI, NarrowTy, OpIdx);
1865 Observer.changedInstr(MI);
1866 return Legalized;
1867 }
1868 case TargetOpcode::G_ICMP: {
1869 Register LHS = MI.getOperand(2).getReg();
1870 LLT SrcTy = MRI.getType(LHS);
1871 CmpInst::Predicate Pred =
1872 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1873
1874 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1875 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1876 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1877 LHSLeftoverRegs, MIRBuilder, MRI))
1878 return UnableToLegalize;
1879
1880 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1881 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1882 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1883 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1884 return UnableToLegalize;
1885
1886 // We now have the LHS and RHS of the compare split into narrow-type
1887 // registers, plus potentially some leftover type.
1888 Register Dst = MI.getOperand(0).getReg();
1889 LLT ResTy = MRI.getType(Dst);
1890 if (ICmpInst::isEquality(Pred)) {
1891 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1892 // them together. For each equal part, the result should be all 0s. For
1893 // each non-equal part, we'll get at least one 1.
1894 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1896 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1897 auto LHS = std::get<0>(LHSAndRHS);
1898 auto RHS = std::get<1>(LHSAndRHS);
1899 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1900 Xors.push_back(Xor);
1901 }
1902
1903 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1904 // to the desired narrow type so that we can OR them together later.
1905 SmallVector<Register, 4> WidenedXors;
1906 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(LHSAndRHS);
1908 auto RHS = std::get<1>(LHSAndRHS);
1909 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1910 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1911 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1912 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1913 llvm::append_range(Xors, WidenedXors);
1914 }
1915
1916 // Now, for each part we broke up, we know if they are equal/not equal
1917 // based off the G_XOR. We can OR these all together and compare against
1918 // 0 to get the result.
1919 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1920 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1921 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1922 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1923 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1924 } else {
1925 Register CmpIn;
1926 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1940 RHSPartRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1943 RHSPartRegs[I]);
1944 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1945 LHSPartRegs[I], RHSPartRegs[I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1947 }
1948
1949 CmpIn = CmpOut;
1950 }
1951
1952 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1953 Register CmpOut;
1954 CmpInst::Predicate PartPred;
1955
1956 if (I == E - 1) {
1957 PartPred = Pred;
1958 CmpOut = Dst;
1959 } else {
1960 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1961 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1962 }
1963
1964 if (!CmpIn) {
1965 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1966 RHSLeftoverRegs[I]);
1967 } else {
1968 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1969 RHSLeftoverRegs[I]);
1970 auto CmpEq =
1971 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1972 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1973 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1974 }
1975
1976 CmpIn = CmpOut;
1977 }
1978 }
1979 MI.eraseFromParent();
1980 return Legalized;
1981 }
1982 case TargetOpcode::G_FCMP:
1983 if (TypeIdx != 0)
1984 return UnableToLegalize;
1985
1986 Observer.changingInstr(MI);
1987 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1988 Observer.changedInstr(MI);
1989 return Legalized;
1990
1991 case TargetOpcode::G_SEXT_INREG: {
1992 if (TypeIdx != 0)
1993 return UnableToLegalize;
1994
1995 int64_t SizeInBits = MI.getOperand(2).getImm();
1996
1997 // So long as the new type has more bits than the bits we're extending we
1998 // don't need to break it apart.
1999 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
2000 Observer.changingInstr(MI);
2001 // We don't lose any non-extension bits by truncating the src and
2002 // sign-extending the dst.
2003 MachineOperand &MO1 = MI.getOperand(1);
2004 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
2005 MO1.setReg(TruncMIB.getReg(0));
2006
2007 MachineOperand &MO2 = MI.getOperand(0);
2008 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2009 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2010 MIRBuilder.buildSExt(MO2, DstExt);
2011 MO2.setReg(DstExt);
2012 Observer.changedInstr(MI);
2013 return Legalized;
2014 }
2015
2016 // Break it apart. Components below the extension point are unmodified. The
2017 // component containing the extension point becomes a narrower SEXT_INREG.
2018 // Components above it are ashr'd from the component containing the
2019 // extension point.
2020 if (SizeOp0 % NarrowSize != 0)
2021 return UnableToLegalize;
2022 int NumParts = SizeOp0 / NarrowSize;
2023
2024 // List the registers where the destination will be scattered.
2026 // List the registers where the source will be split.
2028
2029 // Create all the temporary registers.
2030 for (int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2032
2033 SrcRegs.push_back(SrcReg);
2034 }
2035
2036 // Explode the big arguments into smaller chunks.
2037 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2038
2039 Register AshrCstReg =
2040 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2041 .getReg(0);
2042 Register FullExtensionReg;
2043 Register PartialExtensionReg;
2044
2045 // Do the operation on each small part.
2046 for (int i = 0; i < NumParts; ++i) {
2047 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2048 DstRegs.push_back(SrcRegs[i]);
2049 PartialExtensionReg = DstRegs.back();
2050 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2054 DstRegs.push_back(FullExtensionReg);
2055 continue;
2056 }
2057 DstRegs.push_back(
2058 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2059 .getReg(0));
2060 FullExtensionReg = DstRegs.back();
2061 } else {
2062 DstRegs.push_back(
2064 .buildInstr(
2065 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2066 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2067 .getReg(0));
2068 PartialExtensionReg = DstRegs.back();
2069 }
2070 }
2071
2072 // Gather the destination registers into the final destination.
2073 Register DstReg = MI.getOperand(0).getReg();
2074 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2081 return UnableToLegalize;
2082
2083 Observer.changingInstr(MI);
2084 SmallVector<Register, 2> SrcRegs, DstRegs;
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2087 MIRBuilder, MRI);
2088
2089 for (unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2091 {SrcRegs[NumParts - 1 - i]});
2092 DstRegs.push_back(DstPart.getReg(0));
2093 }
2094
2095 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2096
2097 Observer.changedInstr(MI);
2098 MI.eraseFromParent();
2099 return Legalized;
2100 }
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2103 if (TypeIdx != 1)
2104 return UnableToLegalize;
2105 Observer.changingInstr(MI);
2106 narrowScalarSrc(MI, NarrowTy, 2);
2107 Observer.changedInstr(MI);
2108 return Legalized;
2109 }
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2114 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2115 case TargetOpcode::G_FPEXT:
2116 if (TypeIdx != 0)
2117 return UnableToLegalize;
2118 Observer.changingInstr(MI);
2119 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2120 Observer.changedInstr(MI);
2121 return Legalized;
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2124 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2125 case TargetOpcode::G_VSCALE: {
2126 Register Dst = MI.getOperand(0).getReg();
2127 LLT Ty = MRI.getType(Dst);
2128
2129 // Assume VSCALE(1) fits into a legal integer
2130 const APInt One(NarrowTy.getSizeInBits(), 1);
2131 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2132 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2133 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2134 MIRBuilder.buildMul(Dst, ZExt, C);
2135
2136 MI.eraseFromParent();
2137 return Legalized;
2138 }
2139 }
2140}
2141
2143 LLT Ty = MRI.getType(Val);
2144 if (Ty.isScalar())
2145 return Val;
2146
2147 const DataLayout &DL = MIRBuilder.getDataLayout();
2148 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2149 if (Ty.isPointer()) {
2150 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2151 return Register();
2152 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2153 }
2154
2155 Register NewVal = Val;
2156
2157 assert(Ty.isVector());
2158 if (Ty.isPointerVector())
2159 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2160 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2167 MO.setReg(ExtB.getReg(0));
2168}
2169
2171 unsigned OpIdx) {
2172 MachineOperand &MO = MI.getOperand(OpIdx);
2173 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2174 MO.setReg(ExtB.getReg(0));
2175}
2176
2178 unsigned OpIdx, unsigned TruncOpcode) {
2179 MachineOperand &MO = MI.getOperand(OpIdx);
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2181 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2182 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2183 MO.setReg(DstExt);
2184}
2185
2187 unsigned OpIdx, unsigned ExtOpcode) {
2188 MachineOperand &MO = MI.getOperand(OpIdx);
2189 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2190 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2191 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2192 MO.setReg(DstTrunc);
2193}
2194
2196 unsigned OpIdx) {
2197 MachineOperand &MO = MI.getOperand(OpIdx);
2198 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2199 Register Dst = MO.getReg();
2200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2201 MO.setReg(DstExt);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2203}
2204
2206 unsigned OpIdx) {
2207 MachineOperand &MO = MI.getOperand(OpIdx);
2208 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2209}
2210
2212 MachineOperand &Op = MI.getOperand(OpIdx);
2213 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2214}
2215
2217 MachineOperand &MO = MI.getOperand(OpIdx);
2218 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2219 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2220 MIRBuilder.buildBitcast(MO, CastDst);
2221 MO.setReg(CastDst);
2222}
2223
2225LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2226 LLT WideTy) {
2227 if (TypeIdx != 1)
2228 return UnableToLegalize;
2229
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2232 return UnableToLegalize;
2233
2234 LLT SrcTy = MRI.getType(Src1Reg);
2235 const int DstSize = DstTy.getSizeInBits();
2236 const int SrcSize = SrcTy.getSizeInBits();
2237 const int WideSize = WideTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2239
2240 unsigned NumOps = MI.getNumOperands();
2241 unsigned NumSrc = MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2243
2244 if (WideSize >= DstSize) {
2245 // Directly pack the bits in the target type.
2246 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2247
2248 for (unsigned I = 2; I != NumOps; ++I) {
2249 const unsigned Offset = (I - 1) * PartSize;
2250
2251 Register SrcReg = MI.getOperand(I).getReg();
2252 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2253
2254 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2255
2256 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2257 MRI.createGenericVirtualRegister(WideTy);
2258
2259 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2260 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2261 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2262 ResultReg = NextResult;
2263 }
2264
2265 if (WideSize > DstSize)
2266 MIRBuilder.buildTrunc(DstReg, ResultReg);
2267 else if (DstTy.isPointer())
2268 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2269
2270 MI.eraseFromParent();
2271 return Legalized;
2272 }
2273
2274 // Unmerge the original values to the GCD type, and recombine to the next
2275 // multiple greater than the original type.
2276 //
2277 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2278 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2279 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2280 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2281 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2282 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2283 // %12:_(s12) = G_MERGE_VALUES %10, %11
2284 //
2285 // Padding with undef if necessary:
2286 //
2287 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2288 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2289 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2290 // %7:_(s2) = G_IMPLICIT_DEF
2291 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2292 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2293 // %10:_(s12) = G_MERGE_VALUES %8, %9
2294
2295 const int GCD = std::gcd(SrcSize, WideSize);
2296 LLT GCDTy = LLT::scalar(GCD);
2297
2298 SmallVector<Register, 8> NewMergeRegs;
2299 SmallVector<Register, 8> Unmerges;
2300 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2301
2302 // Decompose the original operands if they don't evenly divide.
2303 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2304 Register SrcReg = MO.getReg();
2305 if (GCD == SrcSize) {
2306 Unmerges.push_back(SrcReg);
2307 } else {
2308 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2309 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2310 Unmerges.push_back(Unmerge.getReg(J));
2311 }
2312 }
2313
2314 // Pad with undef to the next size that is a multiple of the requested size.
2315 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2316 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2317 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2318 Unmerges.push_back(UndefReg);
2319 }
2320
2321 const int PartsPerGCD = WideSize / GCD;
2322
2323 // Build merges of each piece.
2324 ArrayRef<Register> Slicer(Unmerges);
2325 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2326 auto Merge =
2327 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2328 NewMergeRegs.push_back(Merge.getReg(0));
2329 }
2330
2331 // A truncate may be necessary if the requested type doesn't evenly divide the
2332 // original result type.
2333 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2334 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2335 } else {
2336 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2337 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2338 }
2339
2340 MI.eraseFromParent();
2341 return Legalized;
2342}
2343
2345LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2346 LLT WideTy) {
2347 if (TypeIdx != 0)
2348 return UnableToLegalize;
2349
2350 int NumDst = MI.getNumOperands() - 1;
2351 Register SrcReg = MI.getOperand(NumDst).getReg();
2352 LLT SrcTy = MRI.getType(SrcReg);
2353 if (SrcTy.isVector())
2354 return UnableToLegalize;
2355
2356 Register Dst0Reg = MI.getOperand(0).getReg();
2357 LLT DstTy = MRI.getType(Dst0Reg);
2358 if (!DstTy.isScalar())
2359 return UnableToLegalize;
2360
2361 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2362 if (SrcTy.isPointer()) {
2363 const DataLayout &DL = MIRBuilder.getDataLayout();
2364 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2365 LLVM_DEBUG(
2366 dbgs() << "Not casting non-integral address space integer\n");
2367 return UnableToLegalize;
2368 }
2369
2370 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2371 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2372 }
2373
2374 // Widen SrcTy to WideTy. This does not affect the result, but since the
2375 // user requested this size, it is probably better handled than SrcTy and
2376 // should reduce the total number of legalization artifacts.
2377 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2378 SrcTy = WideTy;
2379 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2380 }
2381
2382 // Theres no unmerge type to target. Directly extract the bits from the
2383 // source type
2384 unsigned DstSize = DstTy.getSizeInBits();
2385
2386 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2387 for (int I = 1; I != NumDst; ++I) {
2388 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2389 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2390 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2391 }
2392
2393 MI.eraseFromParent();
2394 return Legalized;
2395 }
2396
2397 // Extend the source to a wider type.
2398 LLT LCMTy = getLCMType(SrcTy, WideTy);
2399
2400 Register WideSrc = SrcReg;
2401 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2402 // TODO: If this is an integral address space, cast to integer and anyext.
2403 if (SrcTy.isPointer()) {
2404 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2405 return UnableToLegalize;
2406 }
2407
2408 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2409 }
2410
2411 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2412
2413 // Create a sequence of unmerges and merges to the original results. Since we
2414 // may have widened the source, we will need to pad the results with dead defs
2415 // to cover the source register.
2416 // e.g. widen s48 to s64:
2417 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2418 //
2419 // =>
2420 // %4:_(s192) = G_ANYEXT %0:_(s96)
2421 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2422 // ; unpack to GCD type, with extra dead defs
2423 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2424 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2425 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2426 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2427 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2428 const LLT GCDTy = getGCDType(WideTy, DstTy);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2430 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2431
2432 // Directly unmerge to the destination without going through a GCD type
2433 // if possible
2434 if (PartsPerRemerge == 1) {
2435 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2436
2437 for (int I = 0; I != NumUnmerge; ++I) {
2438 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2439
2440 for (int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx = I * PartsPerUnmerge + J;
2442 if (Idx < NumDst)
2443 MIB.addDef(MI.getOperand(Idx).getReg());
2444 else {
2445 // Create dead def for excess components.
2446 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2447 }
2448 }
2449
2450 MIB.addUse(Unmerge.getReg(I));
2451 }
2452 } else {
2453 SmallVector<Register, 16> Parts;
2454 for (int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2456
2457 SmallVector<Register, 8> RemergeParts;
2458 for (int I = 0; I != NumDst; ++I) {
2459 for (int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx = I * PartsPerRemerge + J;
2461 RemergeParts.emplace_back(Parts[Idx]);
2462 }
2463
2464 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2465 RemergeParts.clear();
2466 }
2467 }
2468
2469 MI.eraseFromParent();
2470 return Legalized;
2471}
2472
2474LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2475 LLT WideTy) {
2476 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2477 unsigned Offset = MI.getOperand(2).getImm();
2478
2479 if (TypeIdx == 0) {
2480 if (SrcTy.isVector() || DstTy.isVector())
2481 return UnableToLegalize;
2482
2483 SrcOp Src(SrcReg);
2484 if (SrcTy.isPointer()) {
2485 // Extracts from pointers can be handled only if they are really just
2486 // simple integers.
2487 const DataLayout &DL = MIRBuilder.getDataLayout();
2488 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2489 return UnableToLegalize;
2490
2491 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2492 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2493 SrcTy = SrcAsIntTy;
2494 }
2495
2496 if (DstTy.isPointer())
2497 return UnableToLegalize;
2498
2499 if (Offset == 0) {
2500 // Avoid a shift in the degenerate case.
2501 MIRBuilder.buildTrunc(DstReg,
2502 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2503 MI.eraseFromParent();
2504 return Legalized;
2505 }
2506
2507 // Do a shift in the source type.
2508 LLT ShiftTy = SrcTy;
2509 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2510 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2511 ShiftTy = WideTy;
2512 }
2513
2514 auto LShr = MIRBuilder.buildLShr(
2515 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2516 MIRBuilder.buildTrunc(DstReg, LShr);
2517 MI.eraseFromParent();
2518 return Legalized;
2519 }
2520
2521 if (SrcTy.isScalar()) {
2522 Observer.changingInstr(MI);
2523 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2524 Observer.changedInstr(MI);
2525 return Legalized;
2526 }
2527
2528 if (!SrcTy.isVector())
2529 return UnableToLegalize;
2530
2531 if (DstTy != SrcTy.getElementType())
2532 return UnableToLegalize;
2533
2534 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2535 return UnableToLegalize;
2536
2537 Observer.changingInstr(MI);
2538 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2539
2540 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2541 Offset);
2542 widenScalarDst(MI, WideTy.getScalarType(), 0);
2543 Observer.changedInstr(MI);
2544 return Legalized;
2545}
2546
2548LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2549 LLT WideTy) {
2550 if (TypeIdx != 0 || WideTy.isVector())
2551 return UnableToLegalize;
2552 Observer.changingInstr(MI);
2553 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2554 widenScalarDst(MI, WideTy);
2555 Observer.changedInstr(MI);
2556 return Legalized;
2557}
2558
2560LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2561 LLT WideTy) {
2562 unsigned Opcode;
2563 unsigned ExtOpcode;
2564 std::optional<Register> CarryIn;
2565 switch (MI.getOpcode()) {
2566 default:
2567 llvm_unreachable("Unexpected opcode!");
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2571 break;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2575 break;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 break;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2583 break;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn = MI.getOperand(4).getReg();
2588 break;
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn = MI.getOperand(4).getReg();
2593 break;
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn = MI.getOperand(4).getReg();
2598 break;
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn = MI.getOperand(4).getReg();
2603 break;
2604 }
2605
2606 if (TypeIdx == 1) {
2607 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2608
2609 Observer.changingInstr(MI);
2610 if (CarryIn)
2611 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2612 widenScalarDst(MI, WideTy, 1);
2613
2614 Observer.changedInstr(MI);
2615 return Legalized;
2616 }
2617
2618 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2619 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2620 // Do the arithmetic in the larger type.
2621 Register NewOp;
2622 if (CarryIn) {
2623 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2624 NewOp = MIRBuilder
2625 .buildInstr(Opcode, {WideTy, CarryOutTy},
2626 {LHSExt, RHSExt, *CarryIn})
2627 .getReg(0);
2628 } else {
2629 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2630 }
2631 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2632 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2633 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2634 // There is no overflow if the ExtOp is the same as NewOp.
2635 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2636 // Now trunc the NewOp to the original result.
2637 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2638 MI.eraseFromParent();
2639 return Legalized;
2640}
2641
2643LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2644 LLT WideTy) {
2645 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2650 // We can convert this to:
2651 // 1. Any extend iN to iM
2652 // 2. SHL by M-N
2653 // 3. [US][ADD|SUB|SHL]SAT
2654 // 4. L/ASHR by M-N
2655 //
2656 // It may be more efficient to lower this to a min and a max operation in
2657 // the higher precision arithmetic if the promoted operation isn't legal,
2658 // but this decision is up to the target's lowering request.
2659 Register DstReg = MI.getOperand(0).getReg();
2660
2661 unsigned NewBits = WideTy.getScalarSizeInBits();
2662 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2663
2664 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2665 // must not left shift the RHS to preserve the shift amount.
2666 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2667 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2668 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2669 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2670 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2671 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2672
2673 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2674 {ShiftL, ShiftR}, MI.getFlags());
2675
2676 // Use a shift that will preserve the number of sign bits when the trunc is
2677 // folded away.
2678 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2679 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2680
2681 MIRBuilder.buildTrunc(DstReg, Result);
2682 MI.eraseFromParent();
2683 return Legalized;
2684}
2685
2687LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2688 LLT WideTy) {
2689 if (TypeIdx == 1) {
2690 Observer.changingInstr(MI);
2691 widenScalarDst(MI, WideTy, 1);
2692 Observer.changedInstr(MI);
2693 return Legalized;
2694 }
2695
2696 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2697 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2698 LLT SrcTy = MRI.getType(LHS);
2699 LLT OverflowTy = MRI.getType(OriginalOverflow);
2700 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2701
2702 // To determine if the result overflowed in the larger type, we extend the
2703 // input to the larger type, do the multiply (checking if it overflows),
2704 // then also check the high bits of the result to see if overflow happened
2705 // there.
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2708 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2709
2710 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2711 // so we don't need to check the overflow result of larger type Mulo.
2712 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2713
2714 unsigned MulOpc =
2715 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2716
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2720 {LeftOperand, RightOperand});
2721 else
2722 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2723
2724 auto Mul = Mulo->getOperand(0);
2725 MIRBuilder.buildTrunc(Result, Mul);
2726
2727 MachineInstrBuilder ExtResult;
2728 // Overflow occurred if it occurred in the larger type, or if the high part
2729 // of the result does not zero/sign-extend the low part. Check this second
2730 // possibility first.
2731 if (IsSigned) {
2732 // For signed, overflow occurred when the high part does not sign-extend
2733 // the low part.
2734 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2735 } else {
2736 // Unsigned overflow occurred when the high part does not zero-extend the
2737 // low part.
2738 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2739 }
2740
2741 if (WideMulCanOverflow) {
2742 auto Overflow =
2743 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2744 // Finally check if the multiplication in the larger type itself overflowed.
2745 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2746 } else {
2747 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2748 }
2749 MI.eraseFromParent();
2750 return Legalized;
2751}
2752
2755 unsigned Opcode = MI.getOpcode();
2756 switch (Opcode) {
2757 default:
2758 return UnableToLegalize;
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2770 Observer.changingInstr(MI);
2771 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2772 widenScalarDst(MI, WideTy, 0);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2777 Observer.changingInstr(MI);
2778 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2779 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2780 widenScalarDst(MI, WideTy, 0);
2781 Observer.changedInstr(MI);
2782 return Legalized;
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2784 if (TypeIdx == 0) {
2785 Observer.changingInstr(MI);
2786 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2787 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2788 widenScalarDst(MI, WideTy, 0);
2789 Observer.changedInstr(MI);
2790 return Legalized;
2791 }
2792 assert(TypeIdx == 1 &&
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2794 Observer.changingInstr(MI);
2795 widenScalarDst(MI, WideTy, 1);
2796 Observer.changedInstr(MI);
2797 return Legalized;
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2831 if (TypeIdx == 0) {
2832 Observer.changingInstr(MI);
2833 widenScalarDst(MI, WideTy, 0);
2834 Observer.changedInstr(MI);
2835 return Legalized;
2836 }
2837
2838 Register SrcReg = MI.getOperand(1).getReg();
2839
2840 // First extend the input.
2841 unsigned ExtOpc;
2842 switch (Opcode) {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON: // poison shifted out below
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2847 break;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2850 break;
2851 default:
2852 ExtOpc = TargetOpcode::G_ZEXT;
2853 }
2854
2855 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2856 LLT CurTy = MRI.getType(SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2859 // The count is the same in the larger type except if the original
2860 // value was zero. This can be handled by setting the bit just off
2861 // the top of the original type.
2862 auto TopBit = APInt::getOneBitSet(WideTy.getScalarSizeInBits(),
2863 CurTy.getScalarSizeInBits());
2864 MIBSrc = MIRBuilder.buildOr(
2865 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2866 // Now we know the operand is non-zero, use the more relaxed opcode.
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2868 }
2869
2870 unsigned SizeDiff =
2871 WideTy.getScalarSizeInBits() - CurTy.getScalarSizeInBits();
2872
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2874 // An optimization where the result is the CTLZ after the left shift by
2875 // (Difference in widety and current ty), that is,
2876 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2877 // Result = ctlz MIBSrc
2878 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2879 MIRBuilder.buildConstant(WideTy, SizeDiff));
2880 }
2881
2882 // Perform the operation at the larger size.
2883 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2884 // This is already the correct result for CTPOP and CTTZs
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2886 // The correct result is NewOp - (Difference in widety and current ty).
2887 // At this stage SUB is guaranteed to be positive no-wrap,
2888 // that to be used in further KnownBits optimizations for CTLZ.
2889 MIBNewOp = MIRBuilder.buildSub(
2890 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff),
2891 Opcode == TargetOpcode::G_CTLZ
2892 ? std::optional<unsigned>(MachineInstr::NoUWrap)
2893 : std::nullopt);
2894 }
2895
2896 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2897 MI.eraseFromParent();
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_BSWAP: {
2901 Observer.changingInstr(MI);
2902 Register DstReg = MI.getOperand(0).getReg();
2903
2904 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2907 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2908
2909 MI.getOperand(0).setReg(DstExt);
2910
2911 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2912
2913 LLT Ty = MRI.getType(DstReg);
2914 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2915 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2916 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2917
2918 MIRBuilder.buildTrunc(DstReg, ShrReg);
2919 Observer.changedInstr(MI);
2920 return Legalized;
2921 }
2922 case TargetOpcode::G_BITREVERSE: {
2923 Observer.changingInstr(MI);
2924
2925 Register DstReg = MI.getOperand(0).getReg();
2926 LLT Ty = MRI.getType(DstReg);
2927 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2928
2929 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2930 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2931 MI.getOperand(0).setReg(DstExt);
2932 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2933
2934 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2935 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2936 MIRBuilder.buildTrunc(DstReg, Shift);
2937 Observer.changedInstr(MI);
2938 return Legalized;
2939 }
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2942 Observer.changingInstr(MI);
2943 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2944 widenScalarDst(MI, WideTy);
2945 Observer.changedInstr(MI);
2946 return Legalized;
2947
2948 case TargetOpcode::G_ABS:
2949 Observer.changingInstr(MI);
2950 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2951 widenScalarDst(MI, WideTy);
2952 Observer.changedInstr(MI);
2953 return Legalized;
2954
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2962 // Perform operation at larger width (any extension is fines here, high bits
2963 // don't affect the result) and then truncate the result back to the
2964 // original type.
2965 Observer.changingInstr(MI);
2966 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2967 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2968 widenScalarDst(MI, WideTy);
2969 Observer.changedInstr(MI);
2970 return Legalized;
2971
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2974 Observer.changingInstr(MI);
2975
2976 if (TypeIdx == 0) {
2977 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2978 widenScalarDst(MI, WideTy);
2979 } else {
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2981 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2982 }
2983
2984 Observer.changedInstr(MI);
2985 return Legalized;
2986
2987 case TargetOpcode::G_SHL:
2988 Observer.changingInstr(MI);
2989
2990 if (TypeIdx == 0) {
2991 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2992 widenScalarDst(MI, WideTy);
2993 } else {
2994 assert(TypeIdx == 1);
2995 // The "number of bits to shift" operand must preserve its value as an
2996 // unsigned integer:
2997 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2998 }
2999
3000 Observer.changedInstr(MI);
3001 return Legalized;
3002
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3005 if (TypeIdx != 1)
3006 return UnableToLegalize;
3007
3008 Observer.changingInstr(MI);
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 Observer.changedInstr(MI);
3011 return Legalized;
3012
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3018 Observer.changingInstr(MI);
3019 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3020 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3021 widenScalarDst(MI, WideTy);
3022 Observer.changedInstr(MI);
3023 return Legalized;
3024
3025 case TargetOpcode::G_SDIVREM:
3026 Observer.changingInstr(MI);
3027 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3028 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3029 widenScalarDst(MI, WideTy);
3030 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3031 widenScalarDst(MI, WideTy, 1);
3032 Observer.changedInstr(MI);
3033 return Legalized;
3034
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3037 Observer.changingInstr(MI);
3038
3039 if (TypeIdx == 0) {
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 widenScalarSrc(MI, WideTy, 1, CvtOp);
3044 widenScalarDst(MI, WideTy);
3045 } else {
3046 assert(TypeIdx == 1);
3047 // The "number of bits to shift" operand must preserve its value as an
3048 // unsigned integer:
3049 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3050 }
3051
3052 Observer.changedInstr(MI);
3053 return Legalized;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3057 Observer.changingInstr(MI);
3058 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3059 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3060 widenScalarDst(MI, WideTy);
3061 Observer.changedInstr(MI);
3062 return Legalized;
3063 case TargetOpcode::G_UDIVREM:
3064 Observer.changingInstr(MI);
3065 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3066 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3067 widenScalarDst(MI, WideTy);
3068 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3069 widenScalarDst(MI, WideTy, 1);
3070 Observer.changedInstr(MI);
3071 return Legalized;
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3075
3076 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3077 unsigned ExtOpc =
3078 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3079 getApproximateEVTForLLT(WideTy, Ctx))
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3082
3083 Observer.changingInstr(MI);
3084 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3085 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3086 widenScalarDst(MI, WideTy);
3087 Observer.changedInstr(MI);
3088 return Legalized;
3089 }
3090
3091 case TargetOpcode::G_SELECT:
3092 Observer.changingInstr(MI);
3093 if (TypeIdx == 0) {
3094 // Perform operation at larger width (any extension is fine here, high
3095 // bits don't affect the result) and then truncate the result back to the
3096 // original type.
3097 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3098 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3099 widenScalarDst(MI, WideTy);
3100 } else {
3101 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3102 // Explicit extension is required here since high bits affect the result.
3103 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3104 }
3105 Observer.changedInstr(MI);
3106 return Legalized;
3107
3108 case TargetOpcode::G_FPEXT:
3109 if (TypeIdx != 1)
3110 return UnableToLegalize;
3111
3112 Observer.changingInstr(MI);
3113 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3114 Observer.changedInstr(MI);
3115 return Legalized;
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3121 Observer.changingInstr(MI);
3122
3123 if (TypeIdx == 0)
3124 widenScalarDst(MI, WideTy);
3125 else
3126 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3127
3128 Observer.changedInstr(MI);
3129 return Legalized;
3130 case TargetOpcode::G_SITOFP:
3131 Observer.changingInstr(MI);
3132
3133 if (TypeIdx == 0)
3134 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3135 else
3136 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3137
3138 Observer.changedInstr(MI);
3139 return Legalized;
3140 case TargetOpcode::G_UITOFP:
3141 Observer.changingInstr(MI);
3142
3143 if (TypeIdx == 0)
3144 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3145 else
3146 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3147
3148 Observer.changedInstr(MI);
3149 return Legalized;
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3152 Observer.changingInstr(MI);
3153
3154 if (TypeIdx == 0) {
3155 Register OldDst = MI.getOperand(0).getReg();
3156 LLT Ty = MRI.getType(OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3158 Register NewDst;
3159 MI.getOperand(0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3161 uint64_t WideBits = WideTy.getScalarSizeInBits();
3162 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3164 // z = i16 fptosi_sat(a)
3165 // ->
3166 // x = i32 fptosi_sat(a)
3167 // y = smin(x, 32767)
3168 // z = smax(y, -32768)
3169 auto MaxVal = MIRBuilder.buildConstant(
3170 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3171 auto MinVal = MIRBuilder.buildConstant(
3172 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3173 Register MidReg =
3174 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3175 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3176 } else {
3177 // z = i16 fptoui_sat(a)
3178 // ->
3179 // x = i32 fptoui_sat(a)
3180 // y = smin(x, 65535)
3181 auto MaxVal = MIRBuilder.buildConstant(
3182 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3183 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3184 }
3185 MIRBuilder.buildTrunc(OldDst, NewDst);
3186 } else
3187 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3188
3189 Observer.changedInstr(MI);
3190 return Legalized;
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3195 Observer.changingInstr(MI);
3196 widenScalarDst(MI, WideTy);
3197 Observer.changedInstr(MI);
3198 return Legalized;
3199
3200 case TargetOpcode::G_STORE: {
3201 if (TypeIdx != 0)
3202 return UnableToLegalize;
3203
3204 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3206 if (!Ty.isScalar()) {
3207 // We need to widen the vector element type.
3208 Observer.changingInstr(MI);
3209 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3210 // We also need to adjust the MMO to turn this into a truncating store.
3211 MachineMemOperand &MMO = **MI.memoperands_begin();
3212 MachineFunction &MF = MIRBuilder.getMF();
3213 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3214 MI.setMemRefs(MF, {NewMMO});
3215 Observer.changedInstr(MI);
3216 return Legalized;
3217 }
3218
3219 Observer.changingInstr(MI);
3220
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3223 widenScalarSrc(MI, WideTy, 0, ExtType);
3224
3225 Observer.changedInstr(MI);
3226 return Legalized;
3227 }
3228 case TargetOpcode::G_FPTRUNCSTORE:
3229 if (TypeIdx != 0)
3230 return UnableToLegalize;
3231 Observer.changingInstr(MI);
3232 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_FPEXT);
3233 Observer.changedInstr(MI);
3234 return Legalized;
3235 case TargetOpcode::G_CONSTANT: {
3236 MachineOperand &SrcMO = MI.getOperand(1);
3237 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 MRI.getType(MI.getOperand(0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3242 "Illegal Extend");
3243 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3245 ? SrcVal.sext(WideTy.getSizeInBits())
3246 : SrcVal.zext(WideTy.getSizeInBits());
3247 Observer.changingInstr(MI);
3248 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3249
3250 widenScalarDst(MI, WideTy);
3251 Observer.changedInstr(MI);
3252 return Legalized;
3253 }
3254 case TargetOpcode::G_FCONSTANT: {
3255 // To avoid changing the bits of the constant due to extension to a larger
3256 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3257 MachineOperand &SrcMO = MI.getOperand(1);
3258 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3259 MIRBuilder.setInstrAndDebugLoc(MI);
3260 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3261 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3262 MI.eraseFromParent();
3263 return Legalized;
3264 }
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3266 Observer.changingInstr(MI);
3267 widenScalarDst(MI, WideTy);
3268 Observer.changedInstr(MI);
3269 return Legalized;
3270 }
3271 case TargetOpcode::G_BRCOND:
3272 Observer.changingInstr(MI);
3273 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3274 Observer.changedInstr(MI);
3275 return Legalized;
3276
3277 case TargetOpcode::G_FCMP:
3278 Observer.changingInstr(MI);
3279 if (TypeIdx == 0)
3280 widenScalarDst(MI, WideTy);
3281 else {
3282 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3283 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3284 }
3285 Observer.changedInstr(MI);
3286 return Legalized;
3287
3288 case TargetOpcode::G_ICMP:
3289 Observer.changingInstr(MI);
3290 if (TypeIdx == 0)
3291 widenScalarDst(MI, WideTy);
3292 else {
3293 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3294 CmpInst::Predicate Pred =
3295 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3296
3297 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3299 (CmpInst::isSigned(Pred) ||
3300 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3301 getApproximateEVTForLLT(WideTy, Ctx)))
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3304 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3305 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3306 }
3307 Observer.changedInstr(MI);
3308 return Legalized;
3309
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3312 Observer.changingInstr(MI);
3313 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3314 Observer.changedInstr(MI);
3315 return Legalized;
3316
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 && "Expecting only Idx 0");
3319
3320 Observer.changingInstr(MI);
3321 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3322 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3323 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3324 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3325 }
3326
3327 MachineBasicBlock &MBB = *MI.getParent();
3328 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3329 widenScalarDst(MI, WideTy);
3330 Observer.changedInstr(MI);
3331 return Legalized;
3332 }
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3334 if (TypeIdx == 0) {
3335 Register VecReg = MI.getOperand(1).getReg();
3336 LLT VecTy = MRI.getType(VecReg);
3337 Observer.changingInstr(MI);
3338
3339 widenScalarSrc(MI, LLT::vector(VecTy.getElementCount(), WideTy), 1,
3340 TargetOpcode::G_ANYEXT);
3341
3342 widenScalarDst(MI, WideTy, 0);
3343 Observer.changedInstr(MI);
3344 return Legalized;
3345 }
3346
3347 if (TypeIdx != 2)
3348 return UnableToLegalize;
3349 Observer.changingInstr(MI);
3350 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3351 Observer.changedInstr(MI);
3352 return Legalized;
3353 }
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3355 if (TypeIdx == 0) {
3356 Observer.changingInstr(MI);
3357 const LLT WideEltTy = WideTy.getElementType();
3358
3359 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3360 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3361 widenScalarDst(MI, WideTy, 0);
3362 Observer.changedInstr(MI);
3363 return Legalized;
3364 }
3365
3366 if (TypeIdx == 1) {
3367 Observer.changingInstr(MI);
3368
3369 Register VecReg = MI.getOperand(1).getReg();
3370 LLT VecTy = MRI.getType(VecReg);
3371 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3372
3373 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3374 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3375 widenScalarDst(MI, WideVecTy, 0);
3376 Observer.changedInstr(MI);
3377 return Legalized;
3378 }
3379
3380 if (TypeIdx == 2) {
3381 Observer.changingInstr(MI);
3382 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3383 Observer.changedInstr(MI);
3384 return Legalized;
3385 }
3386
3387 return UnableToLegalize;
3388 }
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3432 assert(TypeIdx == 0);
3433 Observer.changingInstr(MI);
3434
3435 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3436 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3437
3438 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3439 Observer.changedInstr(MI);
3440 return Legalized;
3441 case TargetOpcode::G_FMODF: {
3442 Observer.changingInstr(MI);
3443 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3444
3445 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3446 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3447 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 }
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3454 if (TypeIdx == 0) {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3456 return UnableToLegalize;
3457
3458 Observer.changingInstr(MI);
3459 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3460 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3461 Observer.changedInstr(MI);
3462 return Legalized;
3463 }
3464
3465 if (TypeIdx == 1) {
3466 // For some reason SelectionDAG tries to promote to a libcall without
3467 // actually changing the integer type for promotion.
3468 Observer.changingInstr(MI);
3469 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3470 Observer.changedInstr(MI);
3471 return Legalized;
3472 }
3473
3474 return UnableToLegalize;
3475 }
3476 case TargetOpcode::G_FFREXP: {
3477 Observer.changingInstr(MI);
3478
3479 if (TypeIdx == 0) {
3480 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3481 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3482 } else {
3483 widenScalarDst(MI, WideTy, 1);
3484 }
3485
3486 Observer.changedInstr(MI);
3487 return Legalized;
3488 }
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3491 Observer.changingInstr(MI);
3492
3493 if (TypeIdx == 0)
3494 widenScalarDst(MI, WideTy);
3495 else
3496 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3497
3498 Observer.changedInstr(MI);
3499 return Legalized;
3500
3501 case TargetOpcode::G_INTTOPTR:
3502 if (TypeIdx != 1)
3503 return UnableToLegalize;
3504
3505 Observer.changingInstr(MI);
3506 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3507 Observer.changedInstr(MI);
3508 return Legalized;
3509 case TargetOpcode::G_PTRTOINT:
3510 if (TypeIdx != 0)
3511 return UnableToLegalize;
3512
3513 Observer.changingInstr(MI);
3514 widenScalarDst(MI, WideTy, 0);
3515 Observer.changedInstr(MI);
3516 return Legalized;
3517 case TargetOpcode::G_BUILD_VECTOR: {
3518 Observer.changingInstr(MI);
3519
3520 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3521 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3522 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3523
3524 // Avoid changing the result vector type if the source element type was
3525 // requested.
3526 if (TypeIdx == 1) {
3527 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3528 } else {
3529 widenScalarDst(MI, WideTy, 0);
3530 }
3531
3532 Observer.changedInstr(MI);
3533 return Legalized;
3534 }
3535 case TargetOpcode::G_SEXT_INREG:
3536 if (TypeIdx != 0)
3537 return UnableToLegalize;
3538
3539 Observer.changingInstr(MI);
3540 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3541 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3542 Observer.changedInstr(MI);
3543 return Legalized;
3544 case TargetOpcode::G_PTRMASK: {
3545 if (TypeIdx != 1)
3546 return UnableToLegalize;
3547 Observer.changingInstr(MI);
3548 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3549 Observer.changedInstr(MI);
3550 return Legalized;
3551 }
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3553 if (TypeIdx != 1)
3554 return UnableToLegalize;
3555 Observer.changingInstr(MI);
3556 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3557 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3558 Observer.changedInstr(MI);
3559 return Legalized;
3560 }
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3567 if (TypeIdx != 0)
3568 return UnableToLegalize;
3569 Observer.changingInstr(MI);
3570 Register VecReg = MI.getOperand(1).getReg();
3571 LLT VecTy = MRI.getType(VecReg);
3572 LLT WideVecTy = VecTy.changeElementType(WideTy);
3573 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3574 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3575 Observer.changedInstr(MI);
3576 return Legalized;
3577 }
3578 case TargetOpcode::G_VSCALE: {
3579 MachineOperand &SrcMO = MI.getOperand(1);
3580 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3581 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3582 // The CImm is always a signed value
3583 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3584 Observer.changingInstr(MI);
3585 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3586 widenScalarDst(MI, WideTy);
3587 Observer.changedInstr(MI);
3588 return Legalized;
3589 }
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3591 if (TypeIdx != 1)
3592 return UnableToLegalize;
3593
3594 Observer.changingInstr(MI);
3595 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3596 Observer.changedInstr(MI);
3597 return Legalized;
3598 }
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3600 if (TypeIdx != 0)
3601 return UnableToLegalize;
3602
3604 Register BigVec = IS.getBigVec();
3605 Register SubVec = IS.getSubVec();
3606
3607 LLT SubVecTy = MRI.getType(SubVec);
3608 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3609
3610 // Widen the G_INSERT_SUBVECTOR
3611 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3612 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3613 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3614 IS.getIndexImm());
3615
3616 // Truncate back down
3617 auto SplatZero = MIRBuilder.buildSplatVector(
3618 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3619 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3620 SplatZero);
3621
3622 MI.eraseFromParent();
3623
3624 return Legalized;
3625 }
3626 }
3627}
3628
3630 MachineIRBuilder &B, Register Src, LLT Ty) {
3631 auto Unmerge = B.buildUnmerge(Ty, Src);
3632 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3633 Pieces.push_back(Unmerge.getReg(I));
3634}
3635
3636static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3637 MachineIRBuilder &MIRBuilder) {
3638 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3639 MachineFunction &MF = MIRBuilder.getMF();
3640 const DataLayout &DL = MIRBuilder.getDataLayout();
3641 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3642 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3643 LLT DstLLT = MRI.getType(DstReg);
3644
3645 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3646
3647 auto Addr = MIRBuilder.buildConstantPool(
3648 AddrPtrTy,
3649 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3650
3651 MachineMemOperand *MMO =
3653 MachineMemOperand::MOLoad, DstLLT, Alignment);
3654
3655 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3656}
3657
3660 const MachineOperand &ConstOperand = MI.getOperand(1);
3661 const Constant *ConstantVal = ConstOperand.getCImm();
3662
3663 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3664 MI.eraseFromParent();
3665
3666 return Legalized;
3667}
3668
3671 const MachineOperand &ConstOperand = MI.getOperand(1);
3672 const Constant *ConstantVal = ConstOperand.getFPImm();
3673
3674 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3675 MI.eraseFromParent();
3676
3677 return Legalized;
3678}
3679
3682 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3684 LLT SrcEltTy = SrcTy.getElementType();
3686
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3690
3691 LLT DstEltTy = DstTy.getElementType();
3692 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3693 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3694
3695 // If there's an element size mismatch, insert intermediate casts to match
3696 // the result element type.
3697 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3698 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3699 //
3700 // =>
3701 //
3702 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3703 // %3:_(<2 x s8>) = G_BITCAST %2
3704 // %4:_(<2 x s8>) = G_BITCAST %3
3705 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3706 DstCastTy = DstTy.changeVectorElementCount(
3707 ElementCount::getFixed(NumDstElt / NumSrcElt));
3708 SrcPartTy = SrcEltTy;
3709 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3710 //
3711 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3712 //
3713 // =>
3714 //
3715 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3716 // %3:_(s16) = G_BITCAST %2
3717 // %4:_(s16) = G_BITCAST %3
3718 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3719 SrcPartTy = SrcTy.changeVectorElementCount(
3720 ElementCount::getFixed(NumSrcElt / NumDstElt));
3721 DstCastTy = DstEltTy;
3722 }
3723
3724 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3725 for (Register &SrcReg : SrcRegs)
3726 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3727 } else
3728 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3729
3730 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3731 MI.eraseFromParent();
3732 return Legalized;
3733 }
3734
3735 if (DstTy.isVector()) {
3737 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3738 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3739 MI.eraseFromParent();
3740 return Legalized;
3741 }
3742
3743 return UnableToLegalize;
3744}
3745
3746/// Figure out the bit offset into a register when coercing a vector index for
3747/// the wide element type. This is only for the case when promoting vector to
3748/// one with larger elements.
3749//
3750///
3751/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3752/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3754 Register Idx,
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3758 LLT IdxTy = B.getMRI()->getType(Idx);
3759
3760 // Now figure out the amount we need to shift to get the target bits.
3761 auto OffsetMask = B.buildConstant(
3762 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3763 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3764 return B.buildShl(IdxTy, OffsetIdx,
3765 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3766}
3767
3768/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3769/// is casting to a vector with a smaller element size, perform multiple element
3770/// extracts and merge the results. If this is coercing to a vector with larger
3771/// elements, index the bitcasted vector and extract the target element with bit
3772/// operations. This is intended to force the indexing in the native register
3773/// size for architectures that can dynamically index the register file.
3776 LLT CastTy) {
3777 if (TypeIdx != 1)
3778 return UnableToLegalize;
3779
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3781
3782 LLT SrcEltTy = SrcVecTy.getElementType();
3783 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3785
3786 LLT NewEltTy = CastTy.getScalarType();
3787 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3788
3789 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3790 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3791 if (NewNumElts > OldNumElts) {
3792 // Decreasing the vector element size
3793 //
3794 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3795 // =>
3796 // v4i32:castx = bitcast x:v2i64
3797 //
3798 // i64 = bitcast
3799 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3800 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3801 //
3802 if (NewNumElts % OldNumElts != 0)
3803 return UnableToLegalize;
3804
3805 // Type of the intermediate result vector.
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3807 LLT MidTy =
3808 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3809
3810 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3811
3812 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3813 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3814
3815 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3816 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3817 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3818 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3819 NewOps[I] = Elt.getReg(0);
3820 }
3821
3822 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3823 MIRBuilder.buildBitcast(Dst, NewVec);
3824 MI.eraseFromParent();
3825 return Legalized;
3826 }
3827
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3830 return UnableToLegalize;
3831
3832 // This only depends on powers of 2 because we use bit tricks to figure out
3833 // the bit offset we need to shift to get the target element. A general
3834 // expansion could emit division/multiply.
3835 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3836 return UnableToLegalize;
3837
3838 // Increasing the vector element size.
3839 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3840 //
3841 // =>
3842 //
3843 // %cast = G_BITCAST %vec
3844 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3845 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3846 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3847 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3848 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3849 // %elt = G_TRUNC %elt_bits
3850
3851 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3852 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3853
3854 // Divide to get the index in the wider element type.
3855 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3856
3857 Register WideElt = CastVec;
3858 if (CastTy.isVector()) {
3859 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3860 ScaledIdx).getReg(0);
3861 }
3862
3863 // Compute the bit offset into the register of the target element.
3865 MIRBuilder, Idx, NewEltSize, OldEltSize);
3866
3867 // Shift the wide element to get the target element.
3868 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3869 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3870 MI.eraseFromParent();
3871 return Legalized;
3872 }
3873
3874 return UnableToLegalize;
3875}
3876
3877/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3878/// TargetReg, while preserving other bits in \p TargetReg.
3879///
3880/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3882 Register TargetReg, Register InsertReg,
3883 Register OffsetBits) {
3884 LLT TargetTy = B.getMRI()->getType(TargetReg);
3885 LLT InsertTy = B.getMRI()->getType(InsertReg);
3886 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3887 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3888
3889 // Produce a bitmask of the value to insert
3890 auto EltMask = B.buildConstant(
3891 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3892 InsertTy.getSizeInBits()));
3893 // Shift it into position
3894 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3895 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3896
3897 // Clear out the bits in the wide element
3898 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3899
3900 // The value to insert has all zeros already, so stick it into the masked
3901 // wide element.
3902 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3903}
3904
3905/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3906/// is increasing the element size, perform the indexing in the target element
3907/// type, and use bit operations to insert at the element position. This is
3908/// intended for architectures that can dynamically index the register file and
3909/// want to force indexing in the native register size.
3912 LLT CastTy) {
3913 if (TypeIdx != 0)
3914 return UnableToLegalize;
3915
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3918 LLT VecTy = DstTy;
3919
3920 LLT VecEltTy = VecTy.getElementType();
3921 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3922 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3923 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3924
3925 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3926 unsigned OldNumElts = VecTy.getNumElements();
3927
3928 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3931 return UnableToLegalize;
3932
3933 // This only depends on powers of 2 because we use bit tricks to figure out
3934 // the bit offset we need to shift to get the target element. A general
3935 // expansion could emit division/multiply.
3936 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3937 return UnableToLegalize;
3938
3939 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3940 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3941
3942 // Divide to get the index in the wider element type.
3943 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3944
3945 Register ExtractedElt = CastVec;
3946 if (CastTy.isVector()) {
3947 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3948 ScaledIdx).getReg(0);
3949 }
3950
3951 // Compute the bit offset into the register of the target element.
3953 MIRBuilder, Idx, NewEltSize, OldEltSize);
3954
3955 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3956 Val, OffsetBits);
3957 if (CastTy.isVector()) {
3958 InsertedElt = MIRBuilder.buildInsertVectorElement(
3959 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3960 }
3961
3962 MIRBuilder.buildBitcast(Dst, InsertedElt);
3963 MI.eraseFromParent();
3964 return Legalized;
3965 }
3966
3967 return UnableToLegalize;
3968}
3969
3970// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3971// those that have smaller than legal operands.
3972//
3973// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3974//
3975// ===>
3976//
3977// s32 = G_BITCAST <4 x s8>
3978// s32 = G_BITCAST <4 x s8>
3979// s32 = G_BITCAST <4 x s8>
3980// s32 = G_BITCAST <4 x s8>
3981// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3982// <16 x s8> = G_BITCAST <4 x s32>
3985 LLT CastTy) {
3986 // Convert it to CONCAT instruction
3987 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3988 if (!ConcatMI) {
3989 return UnableToLegalize;
3990 }
3991
3992 // Check if bitcast is Legal
3993 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3994 LLT SrcScalTy = CastTy.getScalarType();
3995
3996 // Check if the build vector is Legal
3997 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
3999 }
4000
4001 // Bitcast the sources
4002 SmallVector<Register> BitcastRegs;
4003 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4004 BitcastRegs.push_back(
4005 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4006 .getReg(0));
4007 }
4008
4009 // Build the scalar values into a vector
4010 Register BuildReg =
4011 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
4012 MIRBuilder.buildBitcast(DstReg, BuildReg);
4013
4014 MI.eraseFromParent();
4015 return Legalized;
4016}
4017
4018// This bitcasts a shuffle vector to a different type currently of the same
4019// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
4020// will be used instead.
4021//
4022// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
4023// ===>
4024// <4 x s64> = G_PTRTOINT <4 x p0>
4025// <4 x s64> = G_PTRTOINT <4 x p0>
4026// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
4027// <16 x p0> = G_INTTOPTR <16 x s64>
4030 LLT CastTy) {
4031 auto ShuffleMI = cast<GShuffleVector>(&MI);
4032 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4033 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4034
4035 // We currently only handle vectors of the same size.
4036 if (TypeIdx != 0 ||
4037 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4038 CastTy.getElementCount() != DstTy.getElementCount())
4039 return UnableToLegalize;
4040
4041 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4042
4043 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4044 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4045 auto Shuf =
4046 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4047 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4048
4049 MI.eraseFromParent();
4050 return Legalized;
4051}
4052
4053/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4054///
4055/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4056///
4057/// ===>
4058///
4059/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4060/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4061/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4064 LLT CastTy) {
4065 auto ES = cast<GExtractSubvector>(&MI);
4066
4067 if (!CastTy.isVector())
4068 return UnableToLegalize;
4069
4070 if (TypeIdx != 0)
4071 return UnableToLegalize;
4072
4073 Register Dst = ES->getReg(0);
4074 Register Src = ES->getSrcVec();
4075 uint64_t Idx = ES->getIndexImm();
4076
4077 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4078
4079 LLT DstTy = MRI.getType(Dst);
4080 LLT SrcTy = MRI.getType(Src);
4081 ElementCount DstTyEC = DstTy.getElementCount();
4082 ElementCount SrcTyEC = SrcTy.getElementCount();
4083 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4084 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4085
4086 if (DstTy == CastTy)
4087 return Legalized;
4088
4089 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4090 return UnableToLegalize;
4091
4092 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4093 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4094 if (CastEltSize < DstEltSize)
4095 return UnableToLegalize;
4096
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4100 return UnableToLegalize;
4101
4102 Idx /= AdjustAmt;
4103 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4104 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4105 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4106 MIRBuilder.buildBitcast(Dst, PromotedES);
4107
4108 ES->eraseFromParent();
4109 return Legalized;
4110}
4111
4112/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4113///
4114/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4115/// <vscale x 8 x i1>,
4116/// N
4117///
4118/// ===>
4119///
4120/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4121/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4122/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4123/// <vscale x 1 x i8>, N / 8
4124/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4127 LLT CastTy) {
4128 auto ES = cast<GInsertSubvector>(&MI);
4129
4130 if (!CastTy.isVector())
4131 return UnableToLegalize;
4132
4133 if (TypeIdx != 0)
4134 return UnableToLegalize;
4135
4136 Register Dst = ES->getReg(0);
4137 Register BigVec = ES->getBigVec();
4138 Register SubVec = ES->getSubVec();
4139 uint64_t Idx = ES->getIndexImm();
4140
4141 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4142
4143 LLT DstTy = MRI.getType(Dst);
4144 LLT BigVecTy = MRI.getType(BigVec);
4145 LLT SubVecTy = MRI.getType(SubVec);
4146
4147 if (DstTy == CastTy)
4148 return Legalized;
4149
4150 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4151 return UnableToLegalize;
4152
4153 ElementCount DstTyEC = DstTy.getElementCount();
4154 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4155 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4156 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4157 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4158 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4159
4160 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4161 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4162 if (CastEltSize < DstEltSize)
4163 return UnableToLegalize;
4164
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4168 return UnableToLegalize;
4169
4170 Idx /= AdjustAmt;
4171 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4172 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4173 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4174 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4175 auto PromotedIS =
4176 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4177 MIRBuilder.buildBitcast(Dst, PromotedIS);
4178
4179 ES->eraseFromParent();
4180 return Legalized;
4181}
4182
4184 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4185 Register DstReg = LoadMI.getDstReg();
4186 Register PtrReg = LoadMI.getPointerReg();
4187 LLT DstTy = MRI.getType(DstReg);
4188 MachineMemOperand &MMO = LoadMI.getMMO();
4189 LLT MemTy = MMO.getMemoryType();
4190 MachineFunction &MF = MIRBuilder.getMF();
4191
4192 LLT EltTy = MemTy.getScalarType();
4193
4194 unsigned MemSizeInBits = MemTy.getSizeInBits();
4195 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4196
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4198 if (MemTy.isVector())
4199 return UnableToLegalize;
4200
4201 // Promote to a byte-sized load if not loading an integral number of
4202 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4203 LLT WideMemTy = EltTy.changeElementSize(MemStoreSizeInBits);
4204 MachineMemOperand *NewMMO =
4205 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4206
4207 Register LoadReg = DstReg;
4208 LLT LoadTy = DstTy;
4209
4210 // If this wasn't already an extending load, we need to widen the result
4211 // register to avoid creating a load with a narrower result than the source.
4212 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4213 LoadTy = WideMemTy;
4214 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4215 }
4216
4217 if (isa<GSExtLoad>(LoadMI)) {
4218 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4219 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4220 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4221 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4222 // The extra bits are guaranteed to be zero, since we stored them that
4223 // way. A zext load from Wide thus automatically gives zext from MemVT.
4224 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4225 } else {
4226 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4227 }
4228
4229 if (DstTy != LoadTy)
4230 MIRBuilder.buildTrunc(DstReg, LoadReg);
4231
4232 LoadMI.eraseFromParent();
4233 return Legalized;
4234 }
4235
4236 // Big endian lowering not implemented.
4237 if (MIRBuilder.getDataLayout().isBigEndian())
4238 return UnableToLegalize;
4239
4240 // This load needs splitting into power of 2 sized loads.
4241 //
4242 // Our strategy here is to generate anyextending loads for the smaller
4243 // types up to next power-2 result type, and then combine the two larger
4244 // result values together, before truncating back down to the non-pow-2
4245 // type.
4246 // E.g. v1 = i24 load =>
4247 // v2 = i32 zextload (2 byte)
4248 // v3 = i32 load (1 byte)
4249 // v4 = i32 shl v3, 16
4250 // v5 = i32 or v4, v2
4251 // v1 = i24 trunc v5
4252 // By doing this we generate the correct truncate which should get
4253 // combined away as an artifact with a matching extend.
4254
4255 uint64_t LargeSplitSize, SmallSplitSize;
4256
4257 if (!isPowerOf2_32(MemSizeInBits)) {
4258 // This load needs splitting into power of 2 sized loads.
4259 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4261 } else {
4262 // This is already a power of 2, but we still need to split this in half.
4263 //
4264 // Assume we're being asked to decompose an unaligned load.
4265 // TODO: If this requires multiple splits, handle them all at once.
4266 auto &Ctx = MF.getFunction().getContext();
4267 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4268 return UnableToLegalize;
4269
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4271 }
4272
4273 if (MemTy.isVector()) {
4274 // TODO: Handle vector extloads
4275 if (MemTy != DstTy)
4276 return UnableToLegalize;
4277
4278 Align Alignment = LoadMI.getAlign();
4279 // Given an alignment larger than the size of the memory, we can increase
4280 // the size of the load without needing to scalarize it.
4281 if (Alignment.value() * 8 > MemSizeInBits &&
4283 LLT MoreTy = DstTy.changeVectorElementCount(
4285 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4286 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4287 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4288 NewLoad.getReg(0));
4289 LoadMI.eraseFromParent();
4290 return Legalized;
4291 }
4292
4293 // TODO: We can do better than scalarizing the vector and at least split it
4294 // in half.
4295 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4296 }
4297
4298 MachineMemOperand *LargeMMO =
4299 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4300 MachineMemOperand *SmallMMO =
4301 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4302
4303 LLT PtrTy = MRI.getType(PtrReg);
4304 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4305
4306 LLT AnyExtTy;
4307 LLT OffsetCstRes;
4308 if (EltTy.isPointer()) {
4309 AnyExtTy = LLT::scalar(AnyExtSize);
4310 OffsetCstRes = LLT::scalar(PtrTy.getSizeInBits());
4311 } else {
4312 AnyExtTy = EltTy.changeElementSize(AnyExtSize);
4313 OffsetCstRes = EltTy.changeElementSize(PtrTy.getSizeInBits());
4314 }
4315
4316 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4317 PtrReg, *LargeMMO);
4318
4319 auto OffsetCst = MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4321 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4322 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4323 SmallPtr, *SmallMMO);
4324
4325 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4326 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4327
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4330 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4331 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4332 MIRBuilder.buildTrunc(DstReg, {Or});
4333 } else {
4334 assert(DstTy.isPointer() && "expected pointer");
4335 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4336
4337 // FIXME: We currently consider this to be illegal for non-integral address
4338 // spaces, but we need still need a way to reinterpret the bits.
4339 MIRBuilder.buildIntToPtr(DstReg, Or);
4340 }
4341
4342 LoadMI.eraseFromParent();
4343 return Legalized;
4344}
4345
4347 // Lower a non-power of 2 store into multiple pow-2 stores.
4348 // E.g. split an i24 store into an i16 store + i8 store.
4349 // We do this by first extending the stored value to the next largest power
4350 // of 2 type, and then using truncating stores to store the components.
4351 // By doing this, likewise with G_LOAD, generate an extend that can be
4352 // artifact-combined away instead of leaving behind extracts.
4353 Register SrcReg = StoreMI.getValueReg();
4354 Register PtrReg = StoreMI.getPointerReg();
4355 LLT SrcTy = MRI.getType(SrcReg);
4356 MachineFunction &MF = MIRBuilder.getMF();
4357 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4358 LLT MemTy = MMO.getMemoryType();
4359
4360 unsigned StoreWidth = MemTy.getSizeInBits();
4361 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4362
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4364 // Promote to a byte-sized store with upper bits zero if not
4365 // storing an integral number of bytes. For example, promote
4366 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4367 LLT WideTy = LLT::integer(StoreSizeInBits);
4368
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4370 // Avoid creating a store with a narrower source than result.
4371 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4372 SrcTy = WideTy;
4373 }
4374
4375 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4376
4377 MachineMemOperand *NewMMO =
4378 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4379 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4380 StoreMI.eraseFromParent();
4381 return Legalized;
4382 }
4383
4384 if (MemTy.isVector()) {
4385 if (MemTy != SrcTy)
4386 return scalarizeVectorBooleanStore(StoreMI);
4387
4388 // TODO: We can do better than scalarizing the vector and at least split it
4389 // in half.
4390 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4391 }
4392
4393 unsigned MemSizeInBits = MemTy.getSizeInBits();
4394 uint64_t LargeSplitSize, SmallSplitSize;
4395
4396 if (!isPowerOf2_32(MemSizeInBits)) {
4397 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4398 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4399 } else {
4400 auto &Ctx = MF.getFunction().getContext();
4401 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4402 return UnableToLegalize; // Don't know what we're being asked to do.
4403
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4405 }
4406
4407 // Extend to the next pow-2. If this store was itself the result of lowering,
4408 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4409 // that's wider than the stored size.
4410 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4411 const LLT NewSrcTy = LLT::integer(AnyExtSize);
4412
4413 if (SrcTy.isPointer()) {
4414 const LLT IntPtrTy = LLT::integer(SrcTy.getSizeInBits());
4415 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4416 }
4417
4418 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4419
4420 // Obtain the smaller value by shifting away the larger value.
4421 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4422 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4423
4424 // Generate the PtrAdd and truncating stores.
4425 LLT PtrTy = MRI.getType(PtrReg);
4426 auto OffsetCst = MIRBuilder.buildConstant(LLT::integer(PtrTy.getSizeInBits()),
4427 LargeSplitSize / 8);
4428 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4429
4430 MachineMemOperand *LargeMMO =
4431 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4432 MachineMemOperand *SmallMMO =
4433 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4434 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4435 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4436 StoreMI.eraseFromParent();
4437 return Legalized;
4438}
4439
4442 Register SrcReg = StoreMI.getValueReg();
4443 Register PtrReg = StoreMI.getPointerReg();
4444 LLT SrcTy = MRI.getType(SrcReg);
4445 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4446 LLT MemTy = MMO.getMemoryType();
4447 LLT MemScalarTy = MemTy.getElementType();
4448 MachineFunction &MF = MIRBuilder.getMF();
4449
4450 assert(SrcTy.isVector() && "Expect a vector store type");
4451
4452 if (!MemScalarTy.isByteSized()) {
4453 // We need to build an integer scalar of the vector bit pattern.
4454 // It's not legal for us to add padding when storing a vector.
4455 unsigned NumBits = MemTy.getSizeInBits();
4456 LLT IntTy = LLT::integer(NumBits);
4457 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4458 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4459
4460 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4461 auto Elt = MIRBuilder.buildExtractVectorElement(
4462 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4463 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4464 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4465 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4466 ? (MemTy.getNumElements() - 1) - I
4467 : I;
4468 auto ShiftAmt = MIRBuilder.buildConstant(
4469 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4470 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4471 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4472 }
4473 auto PtrInfo = MMO.getPointerInfo();
4474 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4475 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4476 StoreMI.eraseFromParent();
4477 return Legalized;
4478 }
4479
4480 // TODO: implement simple scalarization.
4481 return UnableToLegalize;
4482}
4483
4485LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4486 switch (MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4488 if (TypeIdx != 0)
4489 return UnableToLegalize;
4490 MachineMemOperand &MMO = **MI.memoperands_begin();
4491
4492 // Not sure how to interpret a bitcast of an extending load.
4493 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4494 return UnableToLegalize;
4495
4496 Observer.changingInstr(MI);
4497 bitcastDst(MI, CastTy, 0);
4498 MMO.setType(CastTy);
4499 // The range metadata is no longer valid when reinterpreted as a different
4500 // type.
4501 MMO.clearRanges();
4502 Observer.changedInstr(MI);
4503 return Legalized;
4504 }
4505 case TargetOpcode::G_STORE: {
4506 if (TypeIdx != 0)
4507 return UnableToLegalize;
4508
4509 MachineMemOperand &MMO = **MI.memoperands_begin();
4510
4511 // Not sure how to interpret a bitcast of a truncating store.
4512 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4513 return UnableToLegalize;
4514
4515 Observer.changingInstr(MI);
4516 bitcastSrc(MI, CastTy, 0);
4517 MMO.setType(CastTy);
4518 Observer.changedInstr(MI);
4519 return Legalized;
4520 }
4521 case TargetOpcode::G_SELECT: {
4522 if (TypeIdx != 0)
4523 return UnableToLegalize;
4524
4525 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4526 LLVM_DEBUG(
4527 dbgs() << "bitcast action not implemented for vector select\n");
4528 return UnableToLegalize;
4529 }
4530
4531 Observer.changingInstr(MI);
4532 bitcastSrc(MI, CastTy, 2);
4533 bitcastSrc(MI, CastTy, 3);
4534 bitcastDst(MI, CastTy, 0);
4535 Observer.changedInstr(MI);
4536 return Legalized;
4537 }
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4541 Observer.changingInstr(MI);
4542 bitcastSrc(MI, CastTy, 1);
4543 bitcastSrc(MI, CastTy, 2);
4544 bitcastDst(MI, CastTy, 0);
4545 Observer.changedInstr(MI);
4546 return Legalized;
4547 }
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4549 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4551 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4552 case TargetOpcode::G_CONCAT_VECTORS:
4553 return bitcastConcatVector(MI, TypeIdx, CastTy);
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4555 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4557 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4559 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4560 default:
4561 return UnableToLegalize;
4562 }
4563}
4564
4565// Legalize an instruction by changing the opcode in place.
4566void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4568 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4570}
4571
4573LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4574 using namespace TargetOpcode;
4575
4576 switch(MI.getOpcode()) {
4577 default:
4578 return UnableToLegalize;
4579 case TargetOpcode::G_FCONSTANT:
4580 return lowerFConstant(MI);
4581 case TargetOpcode::G_BITCAST:
4582 return lowerBitcast(MI);
4583 case TargetOpcode::G_SREM:
4584 case TargetOpcode::G_UREM: {
4585 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4586 auto Quot =
4587 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4588 {MI.getOperand(1), MI.getOperand(2)});
4589
4590 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4591 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4592 MI.eraseFromParent();
4593 return Legalized;
4594 }
4595 case TargetOpcode::G_SADDO:
4596 case TargetOpcode::G_SSUBO:
4597 return lowerSADDO_SSUBO(MI);
4598 case TargetOpcode::G_SADDE:
4599 return lowerSADDE(MI);
4600 case TargetOpcode::G_SSUBE:
4601 return lowerSSUBE(MI);
4602 case TargetOpcode::G_UMULH:
4603 case TargetOpcode::G_SMULH:
4604 return lowerSMULH_UMULH(MI);
4605 case TargetOpcode::G_SMULO:
4606 case TargetOpcode::G_UMULO: {
4607 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4608 // result.
4609 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4610 LLT Ty = MRI.getType(Res);
4611
4612 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4613 ? TargetOpcode::G_SMULH
4614 : TargetOpcode::G_UMULH;
4615
4616 Observer.changingInstr(MI);
4617 const auto &TII = MIRBuilder.getTII();
4618 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4619 MI.removeOperand(1);
4620 Observer.changedInstr(MI);
4621
4622 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4623 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4624
4625 // Move insert point forward so we can use the Res register if needed.
4626 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4627
4628 // For *signed* multiply, overflow is detected by checking:
4629 // (hi != (lo >> bitwidth-1))
4630 if (Opcode == TargetOpcode::G_SMULH) {
4631 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4632 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4633 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4634 } else {
4635 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4636 }
4637 return Legalized;
4638 }
4639 case TargetOpcode::G_FNEG: {
4640 auto [Res, ResTy, SubByReg, SubByRegTy] = MI.getFirst2RegLLTs();
4641 LLT TyInt =
4642 ResTy.changeElementType(LLT::integer(ResTy.getScalarSizeInBits()));
4643 Register CastedSubByReg = SubByReg;
4644
4645 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4646 !SubByRegTy.getScalarType().isInteger()) {
4647 auto BitcastDst = SubByRegTy.changeElementType(
4648 LLT::integer(SubByRegTy.getScalarSizeInBits()));
4649 CastedSubByReg = MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4650 }
4651
4652 auto SignMask = MIRBuilder.buildConstant(
4653 TyInt, APInt::getSignMask(TyInt.getScalarSizeInBits()));
4654
4655 if (ResTy != TyInt) {
4656 Register NewDst =
4657 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4658 MIRBuilder.buildBitcast(Res, NewDst);
4659 } else
4660 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4661
4662 MI.eraseFromParent();
4663 return Legalized;
4664 }
4665 case TargetOpcode::G_FSUB:
4666 case TargetOpcode::G_STRICT_FSUB: {
4667 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4668 LLT Ty = MRI.getType(Res);
4669
4670 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4671 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4672
4673 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4674 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4675 else
4676 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4677
4678 MI.eraseFromParent();
4679 return Legalized;
4680 }
4681 case TargetOpcode::G_FMAD:
4682 return lowerFMad(MI);
4683 case TargetOpcode::G_FFLOOR:
4684 return lowerFFloor(MI);
4685 case TargetOpcode::G_LROUND:
4686 case TargetOpcode::G_LLROUND: {
4687 Register DstReg = MI.getOperand(0).getReg();
4688 Register SrcReg = MI.getOperand(1).getReg();
4689 LLT SrcTy = MRI.getType(SrcReg);
4690 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4691 {SrcReg});
4692 MIRBuilder.buildFPTOSI(DstReg, Round);
4693 MI.eraseFromParent();
4694 return Legalized;
4695 }
4696 case TargetOpcode::G_INTRINSIC_ROUND:
4697 return lowerIntrinsicRound(MI);
4698 case TargetOpcode::G_FRINT: {
4699 // Since round even is the assumed rounding mode for unconstrained FP
4700 // operations, rint and roundeven are the same operation.
4701 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4702 return Legalized;
4703 }
4704 case TargetOpcode::G_INTRINSIC_LRINT:
4705 case TargetOpcode::G_INTRINSIC_LLRINT: {
4706 Register DstReg = MI.getOperand(0).getReg();
4707 Register SrcReg = MI.getOperand(1).getReg();
4708 LLT SrcTy = MRI.getType(SrcReg);
4709 auto Round =
4710 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4711 MIRBuilder.buildFPTOSI(DstReg, Round);
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4716 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4717 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4718 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4719 **MI.memoperands_begin());
4720 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4721 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4722 MI.eraseFromParent();
4723 return Legalized;
4724 }
4725 case TargetOpcode::G_LOAD:
4726 case TargetOpcode::G_SEXTLOAD:
4727 case TargetOpcode::G_ZEXTLOAD:
4728 return lowerLoad(cast<GAnyLoad>(MI));
4729 case TargetOpcode::G_STORE:
4730 return lowerStore(cast<GStore>(MI));
4731 case TargetOpcode::G_CTLZ_ZERO_POISON:
4732 case TargetOpcode::G_CTTZ_ZERO_POISON:
4733 case TargetOpcode::G_CTLZ:
4734 case TargetOpcode::G_CTTZ:
4735 case TargetOpcode::G_CTPOP:
4736 case TargetOpcode::G_CTLS:
4737 return lowerBitCount(MI);
4738 case G_UADDO: {
4739 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4740
4741 Register NewRes = MRI.cloneVirtualRegister(Res);
4742
4743 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4744 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4745
4746 MIRBuilder.buildCopy(Res, NewRes);
4747
4748 MI.eraseFromParent();
4749 return Legalized;
4750 }
4751 case G_UADDE: {
4752 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4753 const LLT CondTy = MRI.getType(CarryOut);
4754 const LLT Ty = MRI.getType(Res);
4755
4756 Register NewRes = MRI.cloneVirtualRegister(Res);
4757
4758 // Initial add of the two operands.
4759 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4760
4761 // Initial check for carry.
4762 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4763
4764 // Add the sum and the carry.
4765 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4766 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4767
4768 // Second check for carry. We can only carry if the initial sum is all 1s
4769 // and the carry is set, resulting in a new sum of 0.
4770 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4771 auto ResEqZero =
4772 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4773 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4774 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4775
4776 MIRBuilder.buildCopy(Res, NewRes);
4777
4778 MI.eraseFromParent();
4779 return Legalized;
4780 }
4781 case G_USUBO: {
4782 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4783
4784 MIRBuilder.buildSub(Res, LHS, RHS);
4785 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4786
4787 MI.eraseFromParent();
4788 return Legalized;
4789 }
4790 case G_USUBE: {
4791 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4792 const LLT CondTy = MRI.getType(BorrowOut);
4793 const LLT Ty = MRI.getType(Res);
4794
4795 // Initial subtract of the two operands.
4796 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4797
4798 // Initial check for borrow.
4799 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4800
4801 // Subtract the borrow from the first subtract.
4802 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4803 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4804
4805 // Second check for borrow. We can only borrow if the initial difference is
4806 // 0 and the borrow is set, resulting in a new difference of all 1s.
4807 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4808 auto TmpResEqZero =
4809 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4810 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4811 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4812
4813 MI.eraseFromParent();
4814 return Legalized;
4815 }
4816 case G_UITOFP:
4817 return lowerUITOFP(MI);
4818 case G_SITOFP:
4819 return lowerSITOFP(MI);
4820 case G_FPTOUI:
4821 return lowerFPTOUI(MI);
4822 case G_FPTOSI:
4823 return lowerFPTOSI(MI);
4824 case G_FPTOUI_SAT:
4825 case G_FPTOSI_SAT:
4826 return lowerFPTOINT_SAT(MI);
4827 case G_FPEXT:
4828 return lowerFPExtAndTruncMem(MI);
4829 case G_FPTRUNC:
4830 return lowerFPTRUNC(MI);
4831 case G_FPOWI:
4832 return lowerFPOWI(MI);
4833 case G_FMODF:
4834 return lowerFMODF(MI);
4835 case G_SMIN:
4836 case G_SMAX:
4837 case G_UMIN:
4838 case G_UMAX:
4839 return lowerMinMax(MI);
4840 case G_SCMP:
4841 case G_UCMP:
4842 return lowerThreewayCompare(MI);
4843 case G_FCOPYSIGN:
4844 return lowerFCopySign(MI);
4845 case G_FMINNUM:
4846 case G_FMAXNUM:
4847 case G_FMINIMUMNUM:
4848 case G_FMAXIMUMNUM:
4849 return lowerFMinNumMaxNum(MI);
4850 case G_FMINIMUM:
4851 case G_FMAXIMUM:
4852 return lowerFMinimumMaximum(MI);
4853 case G_MERGE_VALUES:
4854 return lowerMergeValues(MI);
4855 case G_UNMERGE_VALUES:
4856 return lowerUnmergeValues(MI);
4857 case TargetOpcode::G_SEXT_INREG: {
4858 assert(MI.getOperand(2).isImm() && "Expected immediate");
4859 int64_t SizeInBits = MI.getOperand(2).getImm();
4860
4861 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4862 LLT DstTy = MRI.getType(DstReg);
4863 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4864
4865 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4866 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4867 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4868 MI.eraseFromParent();
4869 return Legalized;
4870 }
4871 case G_EXTRACT_VECTOR_ELT:
4872 case G_INSERT_VECTOR_ELT:
4874 case G_SHUFFLE_VECTOR:
4875 return lowerShuffleVector(MI);
4876 case G_VECTOR_COMPRESS:
4877 return lowerVECTOR_COMPRESS(MI);
4878 case G_DYN_STACKALLOC:
4879 return lowerDynStackAlloc(MI);
4880 case G_STACKSAVE:
4881 return lowerStackSave(MI);
4882 case G_STACKRESTORE:
4883 return lowerStackRestore(MI);
4884 case G_EXTRACT:
4885 return lowerExtract(MI);
4886 case G_INSERT:
4887 return lowerInsert(MI);
4888 case G_BSWAP:
4889 return lowerBswap(MI);
4890 case G_BITREVERSE:
4891 return lowerBitreverse(MI);
4892 case G_READ_REGISTER:
4893 case G_WRITE_REGISTER:
4894 return lowerReadWriteRegister(MI);
4895 case G_UADDSAT:
4896 case G_USUBSAT: {
4897 // Try to make a reasonable guess about which lowering strategy to use. The
4898 // target can override this with custom lowering and calling the
4899 // implementation functions.
4900 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4901 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4902 return lowerAddSubSatToMinMax(MI);
4904 }
4905 case G_SADDSAT:
4906 case G_SSUBSAT: {
4907 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4908
4909 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4910 // since it's a shorter expansion. However, we would need to figure out the
4911 // preferred boolean type for the carry out for the query.
4912 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4913 return lowerAddSubSatToMinMax(MI);
4915 }
4916 case G_SSHLSAT:
4917 case G_USHLSAT:
4918 return lowerShlSat(MI);
4919 case G_ABS:
4920 return lowerAbsToAddXor(MI);
4921 case G_ABDS:
4922 case G_ABDU: {
4923 bool IsSigned = MI.getOpcode() == G_ABDS;
4924 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4925 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4926 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4927 return lowerAbsDiffToMinMax(MI);
4928 }
4929 return lowerAbsDiffToSelect(MI);
4930 }
4931 case G_FABS:
4932 return lowerFAbs(MI);
4933 case G_SELECT:
4934 return lowerSelect(MI);
4935 case G_IS_FPCLASS:
4936 return lowerISFPCLASS(MI);
4937 case G_SDIVREM:
4938 case G_UDIVREM:
4939 return lowerDIVREM(MI);
4940 case G_FSHL:
4941 case G_FSHR:
4942 return lowerFunnelShift(MI);
4943 case G_ROTL:
4944 case G_ROTR:
4945 return lowerRotate(MI);
4946 case G_MEMSET:
4947 case G_MEMCPY:
4948 case G_MEMMOVE:
4949 return lowerMemCpyFamily(MI);
4950 case G_MEMCPY_INLINE:
4951 return lowerMemcpyInline(MI);
4952 case G_ZEXT:
4953 case G_SEXT:
4954 case G_ANYEXT:
4955 return lowerEXT(MI);
4956 case G_TRUNC:
4957 return lowerTRUNC(MI);
4959 return lowerVectorReduction(MI);
4960 case G_VAARG:
4961 return lowerVAArg(MI);
4962 case G_ATOMICRMW_SUB: {
4963 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4964 const LLT ValTy = MRI.getType(Val);
4965 MachineMemOperand *MMO = *MI.memoperands_begin();
4966
4967 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4968 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4969 MI.eraseFromParent();
4970 return Legalized;
4971 }
4972 case G_SMULFIX:
4973 case G_UMULFIX:
4974 return lowerMulfix(MI);
4975 }
4976}
4977
4979 Align MinAlign) const {
4980 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4981 // datalayout for the preferred alignment. Also there should be a target hook
4982 // for this to allow targets to reduce the alignment and ignore the
4983 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4984 // the type.
4985 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4986}
4987
4990 MachinePointerInfo &PtrInfo) {
4991 MachineFunction &MF = MIRBuilder.getMF();
4992 const DataLayout &DL = MIRBuilder.getDataLayout();
4993 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4994
4995 unsigned AddrSpace = DL.getAllocaAddrSpace();
4996 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4997
4998 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4999 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5000}
5001
5003 const SrcOp &Val) {
5004 LLT SrcTy = Val.getLLTTy(MRI);
5005 Align StackTypeAlign =
5006 std::max(getStackTemporaryAlignment(SrcTy),
5008 MachinePointerInfo PtrInfo;
5009 auto StackTemp =
5010 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
5011
5012 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5013 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5014}
5015
5017 LLT VecTy) {
5018 LLT IdxTy = B.getMRI()->getType(IdxReg);
5019 unsigned NElts = VecTy.getNumElements();
5020
5021 int64_t IdxVal;
5022 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
5023 if (IdxVal < VecTy.getNumElements())
5024 return IdxReg;
5025 // If a constant index would be out of bounds, clamp it as well.
5026 }
5027
5028 if (isPowerOf2_32(NElts)) {
5029 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
5030 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
5031 }
5032
5033 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
5034 .getReg(0);
5035}
5036
5038 Register Index) {
5039 LLT EltTy = VecTy.getElementType();
5040
5041 // Calculate the element offset and add it to the pointer.
5042 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
5043 assert(EltSize * 8 == EltTy.getSizeInBits() &&
5044 "Converting bits to bytes lost precision");
5045
5046 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
5047
5048 // Convert index to the correct size for the address space.
5049 const DataLayout &DL = MIRBuilder.getDataLayout();
5050 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5051 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
5052 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
5053 if (IdxTy != MRI.getType(Index))
5054 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5055
5056 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
5057 MIRBuilder.buildConstant(IdxTy, EltSize));
5058
5059 LLT PtrTy = MRI.getType(VecPtr);
5060 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
5061}
5062
5063#ifndef NDEBUG
5064/// Check that all vector operands have same number of elements. Other operands
5065/// should be listed in NonVecOp.
5068 std::initializer_list<unsigned> NonVecOpIndices) {
5069 if (MI.getNumMemOperands() != 0)
5070 return false;
5071
5072 LLT VecTy = MRI.getType(MI.getReg(0));
5073 if (!VecTy.isVector())
5074 return false;
5075 unsigned NumElts = VecTy.getNumElements();
5076
5077 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5078 MachineOperand &Op = MI.getOperand(OpIdx);
5079 if (!Op.isReg()) {
5080 if (!is_contained(NonVecOpIndices, OpIdx))
5081 return false;
5082 continue;
5083 }
5084
5085 LLT Ty = MRI.getType(Op.getReg());
5086 if (!Ty.isVector()) {
5087 if (!is_contained(NonVecOpIndices, OpIdx))
5088 return false;
5089 continue;
5090 }
5091
5092 if (Ty.getNumElements() != NumElts)
5093 return false;
5094 }
5095
5096 return true;
5097}
5098#endif
5099
5100/// Fill \p DstOps with DstOps that have same number of elements combined as
5101/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5102/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5103/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5104static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5105 unsigned NumElts) {
5106 LLT LeftoverTy;
5107 assert(Ty.isVector() && "Expected vector type");
5108 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5109 int NumParts, NumLeftover;
5110 std::tie(NumParts, NumLeftover) =
5111 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5112
5113 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5114 for (int i = 0; i < NumParts; ++i) {
5115 DstOps.push_back(NarrowTy);
5116 }
5117
5118 if (LeftoverTy.isValid()) {
5119 assert(NumLeftover == 1 && "expected exactly one leftover");
5120 DstOps.push_back(LeftoverTy);
5121 }
5122}
5123
5124/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5125/// made from \p Op depending on operand type.
5127 MachineOperand &Op) {
5128 for (unsigned i = 0; i < N; ++i) {
5129 if (Op.isReg())
5130 Ops.push_back(Op.getReg());
5131 else if (Op.isImm())
5132 Ops.push_back(Op.getImm());
5133 else if (Op.isPredicate())
5134 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5135 else
5136 llvm_unreachable("Unsupported type");
5137 }
5138}
5139
5140// Handle splitting vector operations which need to have the same number of
5141// elements in each type index, but each type index may have a different element
5142// type.
5143//
5144// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5145// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5146// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5147//
5148// Also handles some irregular breakdown cases, e.g.
5149// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5150// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5151// s64 = G_SHL s64, s32
5154 GenericMachineInstr &MI, unsigned NumElts,
5155 std::initializer_list<unsigned> NonVecOpIndices) {
5156 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5157 "Non-compatible opcode or not specified non-vector operands");
5158 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5159
5160 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5161 unsigned NumDefs = MI.getNumDefs();
5162
5163 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5164 // Build instructions with DstOps to use instruction found by CSE directly.
5165 // CSE copies found instruction into given vreg when building with vreg dest.
5166 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5167 // Output registers will be taken from created instructions.
5168 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5169 for (unsigned i = 0; i < NumDefs; ++i) {
5170 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5171 }
5172
5173 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5174 // Operands listed in NonVecOpIndices will be used as is without splitting;
5175 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5176 // scalar condition (op 1), immediate in sext_inreg (op 2).
5177 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5178 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5179 ++UseIdx, ++UseNo) {
5180 if (is_contained(NonVecOpIndices, UseIdx)) {
5181 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5182 MI.getOperand(UseIdx));
5183 } else {
5184 SmallVector<Register, 8> SplitPieces;
5185 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5186 MRI);
5187 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5188 }
5189 }
5190
5191 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5192
5193 // Take i-th piece of each input operand split and build sub-vector/scalar
5194 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5195 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5197 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5198 Defs.push_back(OutputOpsPieces[DstNo][i]);
5199
5201 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5202 Uses.push_back(InputOpsPieces[InputNo][i]);
5203
5204 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5205 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5206 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5207 }
5208
5209 // Merge small outputs into MI's output for each def operand.
5210 if (NumLeftovers) {
5211 for (unsigned i = 0; i < NumDefs; ++i)
5212 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5213 } else {
5214 for (unsigned i = 0; i < NumDefs; ++i)
5215 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5216 }
5217
5218 MI.eraseFromParent();
5219 return Legalized;
5220}
5221
5224 unsigned NumElts) {
5225 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5226
5227 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5228 unsigned NumDefs = MI.getNumDefs();
5229
5230 SmallVector<DstOp, 8> OutputOpsPieces;
5231 SmallVector<Register, 8> OutputRegs;
5232 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5233
5234 // Instructions that perform register split will be inserted in basic block
5235 // where register is defined (basic block is in the next operand).
5236 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5237 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5238 UseIdx += 2, ++UseNo) {
5239 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5240 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5241 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5242 MIRBuilder, MRI);
5243 }
5244
5245 // Build PHIs with fewer elements.
5246 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5247 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5248 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5249 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5250 Phi.addDef(
5251 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5252 OutputRegs.push_back(Phi.getReg(0));
5253
5254 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5255 Phi.addUse(InputOpsPieces[j][i]);
5256 Phi.add(MI.getOperand(1 + j * 2 + 1));
5257 }
5258 }
5259
5260 // Set the insert point after the existing PHIs
5261 MachineBasicBlock &MBB = *MI.getParent();
5262 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5263
5264 // Merge small outputs into MI's def.
5265 if (NumLeftovers) {
5266 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5267 } else {
5268 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5269 }
5270
5271 MI.eraseFromParent();
5272 return Legalized;
5273}
5274
5277 unsigned TypeIdx,
5278 LLT NarrowTy) {
5279 const int NumDst = MI.getNumOperands() - 1;
5280 const Register SrcReg = MI.getOperand(NumDst).getReg();
5281 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5282 LLT SrcTy = MRI.getType(SrcReg);
5283
5284 if (TypeIdx != 1 || NarrowTy == DstTy)
5285 return UnableToLegalize;
5286
5287 // Requires compatible types. Otherwise SrcReg should have been defined by
5288 // merge-like instruction that would get artifact combined. Most likely
5289 // instruction that defines SrcReg has to perform more/fewer elements
5290 // legalization compatible with NarrowTy.
5291 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5292 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5293
5294 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5295 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5296 return UnableToLegalize;
5297
5298 // This is most likely DstTy (smaller then register size) packed in SrcTy
5299 // (larger then register size) and since unmerge was not combined it will be
5300 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5301 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5302
5303 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5304 //
5305 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5306 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5307 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5308 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5309 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5310 const int PartsPerUnmerge = NumDst / NumUnmerge;
5311
5312 for (int I = 0; I != NumUnmerge; ++I) {
5313 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5314
5315 for (int J = 0; J != PartsPerUnmerge; ++J)
5316 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5317 MIB.addUse(Unmerge.getReg(I));
5318 }
5319
5320 MI.eraseFromParent();
5321 return Legalized;
5322}
5323
5326 LLT NarrowTy) {
5327 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5328 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5329 // that should have been artifact combined. Most likely instruction that uses
5330 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5331 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5332 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5333 if (NarrowTy == SrcTy)
5334 return UnableToLegalize;
5335
5336 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5337 // is for old mir tests. Since the changes to more/fewer elements it should no
5338 // longer be possible to generate MIR like this when starting from llvm-ir
5339 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5340 if (TypeIdx == 1) {
5341 assert(SrcTy.isVector() && "Expected vector types");
5342 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5343 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5344 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5345 return UnableToLegalize;
5346 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5347 //
5348 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5349 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5350 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5351 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5352 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5353 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5354
5356 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5357 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5358 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5359 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5360 Elts.push_back(Unmerge.getReg(j));
5361 }
5362
5363 SmallVector<Register, 8> NarrowTyElts;
5364 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5365 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5366 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5367 ++i, Offset += NumNarrowTyElts) {
5368 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5369 NarrowTyElts.push_back(
5370 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5371 }
5372
5373 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5374 MI.eraseFromParent();
5375 return Legalized;
5376 }
5377
5378 assert(TypeIdx == 0 && "Bad type index");
5379 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5380 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5381 return UnableToLegalize;
5382
5383 // This is most likely SrcTy (smaller then register size) packed in DstTy
5384 // (larger then register size) and since merge was not combined it will be
5385 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5386 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5387
5388 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5389 //
5390 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5391 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5392 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5393 SmallVector<Register, 8> NarrowTyElts;
5394 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5395 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5396 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5397 for (unsigned i = 0; i < NumParts; ++i) {
5399 for (unsigned j = 0; j < NumElts; ++j)
5400 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5401 NarrowTyElts.push_back(
5402 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5403 }
5404
5405 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5406 MI.eraseFromParent();
5407 return Legalized;
5408}
5409
5412 unsigned TypeIdx,
5413 LLT NarrowVecTy) {
5414 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5415 Register InsertVal;
5416 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5417
5418 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5419 if (IsInsert)
5420 InsertVal = MI.getOperand(2).getReg();
5421
5422 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5423 LLT VecTy = MRI.getType(SrcVec);
5424
5425 // If the index is a constant, we can really break this down as you would
5426 // expect, and index into the target size pieces.
5427 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5428 if (MaybeCst) {
5429 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5430 // Avoid out of bounds indexing the pieces.
5431 if (IdxVal >= VecTy.getNumElements()) {
5432 MIRBuilder.buildUndef(DstReg);
5433 MI.eraseFromParent();
5434 return Legalized;
5435 }
5436
5437 if (!NarrowVecTy.isVector()) {
5438 SmallVector<Register, 8> SplitPieces;
5439 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5440 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5441 if (IsInsert) {
5442 SplitPieces[IdxVal] = InsertVal;
5443 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5444 } else {
5445 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5446 }
5447 } else {
5448 SmallVector<Register, 8> VecParts;
5449 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5450
5451 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5452 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5453 TargetOpcode::G_ANYEXT);
5454
5455 unsigned NewNumElts = NarrowVecTy.getNumElements();
5456
5457 LLT IdxTy = MRI.getType(Idx);
5458 int64_t PartIdx = IdxVal / NewNumElts;
5459 auto NewIdx =
5460 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5461
5462 if (IsInsert) {
5463 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5464
5465 // Use the adjusted index to insert into one of the subvectors.
5466 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5467 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5468 VecParts[PartIdx] = InsertPart.getReg(0);
5469
5470 // Recombine the inserted subvector with the others to reform the result
5471 // vector.
5472 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5473 } else {
5474 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5475 }
5476 }
5477
5478 MI.eraseFromParent();
5479 return Legalized;
5480 }
5481
5482 // With a variable index, we can't perform the operation in a smaller type, so
5483 // we're forced to expand this.
5484 //
5485 // TODO: We could emit a chain of compare/select to figure out which piece to
5486 // index.
5488}
5489
5492 LLT NarrowTy) {
5493 // FIXME: Don't know how to handle secondary types yet.
5494 if (TypeIdx != 0)
5495 return UnableToLegalize;
5496
5497 if (!NarrowTy.isByteSized()) {
5498 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5499 return UnableToLegalize;
5500 }
5501
5502 // This implementation doesn't work for atomics. Give up instead of doing
5503 // something invalid.
5504 if (LdStMI.isAtomic())
5505 return UnableToLegalize;
5506
5507 bool IsLoad = isa<GLoad>(LdStMI);
5508 Register ValReg = LdStMI.getReg(0);
5509 Register AddrReg = LdStMI.getPointerReg();
5510 LLT ValTy = MRI.getType(ValReg);
5511
5512 // FIXME: Do we need a distinct NarrowMemory legalize action?
5513 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5514 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5515 return UnableToLegalize;
5516 }
5517
5518 int NumParts = -1;
5519 int NumLeftover = -1;
5520 LLT LeftoverTy;
5521 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5522 if (IsLoad) {
5523 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5524 } else {
5525 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5526 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5527 NumParts = NarrowRegs.size();
5528 NumLeftover = NarrowLeftoverRegs.size();
5529 }
5530 }
5531
5532 if (NumParts == -1)
5533 return UnableToLegalize;
5534
5535 LLT PtrTy = MRI.getType(AddrReg);
5536 const LLT OffsetTy = LLT::integer(PtrTy.getSizeInBits());
5537
5538 unsigned TotalSize = ValTy.getSizeInBits();
5539
5540 // Split the load/store into PartTy sized pieces starting at Offset. If this
5541 // is a load, return the new registers in ValRegs. For a store, each elements
5542 // of ValRegs should be PartTy. Returns the next offset that needs to be
5543 // handled.
5544 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5545 auto MMO = LdStMI.getMMO();
5546 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5547 unsigned NumParts, unsigned Offset) -> unsigned {
5548 MachineFunction &MF = MIRBuilder.getMF();
5549 unsigned PartSize = PartTy.getSizeInBits();
5550 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5551 ++Idx) {
5552 unsigned ByteOffset = Offset / 8;
5553 Register NewAddrReg;
5554
5555 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5556 ByteOffset);
5557
5558 MachineMemOperand *NewMMO =
5559 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5560
5561 if (IsLoad) {
5562 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5563 ValRegs.push_back(Dst);
5564 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5565 } else {
5566 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5567 }
5568 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5569 }
5570
5571 return Offset;
5572 };
5573
5574 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5575 unsigned HandledOffset =
5576 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5577
5578 // Handle the rest of the register if this isn't an even type breakdown.
5579 if (LeftoverTy.isValid())
5580 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5581
5582 if (IsLoad) {
5583 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5584 LeftoverTy, NarrowLeftoverRegs);
5585 }
5586
5587 LdStMI.eraseFromParent();
5588 return Legalized;
5589}
5590
5593 LLT NarrowTy) {
5594 using namespace TargetOpcode;
5596 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5597
5598 switch (MI.getOpcode()) {
5599 case G_IMPLICIT_DEF:
5600 case G_TRUNC:
5601 case G_AND:
5602 case G_OR:
5603 case G_XOR:
5604 case G_ADD:
5605 case G_SUB:
5606 case G_MUL:
5607 case G_PTR_ADD:
5608 case G_SMULH:
5609 case G_UMULH:
5610 case G_FADD:
5611 case G_FMUL:
5612 case G_FSUB:
5613 case G_FNEG:
5614 case G_FABS:
5615 case G_FCANONICALIZE:
5616 case G_FDIV:
5617 case G_FREM:
5618 case G_FMA:
5619 case G_FMAD:
5620 case G_FPOW:
5621 case G_FEXP:
5622 case G_FEXP2:
5623 case G_FEXP10:
5624 case G_FLOG:
5625 case G_FLOG2:
5626 case G_FLOG10:
5627 case G_FLDEXP:
5628 case G_FNEARBYINT:
5629 case G_FCEIL:
5630 case G_FFLOOR:
5631 case G_FRINT:
5632 case G_INTRINSIC_LRINT:
5633 case G_INTRINSIC_LLRINT:
5634 case G_INTRINSIC_ROUND:
5635 case G_INTRINSIC_ROUNDEVEN:
5636 case G_LROUND:
5637 case G_LLROUND:
5638 case G_INTRINSIC_TRUNC:
5639 case G_FMODF:
5640 case G_FCOS:
5641 case G_FSIN:
5642 case G_FTAN:
5643 case G_FACOS:
5644 case G_FASIN:
5645 case G_FATAN:
5646 case G_FATAN2:
5647 case G_FCOSH:
5648 case G_FSINH:
5649 case G_FTANH:
5650 case G_FSQRT:
5651 case G_BSWAP:
5652 case G_BITREVERSE:
5653 case G_SDIV:
5654 case G_UDIV:
5655 case G_SREM:
5656 case G_UREM:
5657 case G_SDIVREM:
5658 case G_UDIVREM:
5659 case G_SMIN:
5660 case G_SMAX:
5661 case G_UMIN:
5662 case G_UMAX:
5663 case G_ABS:
5664 case G_FMINNUM:
5665 case G_FMAXNUM:
5666 case G_FMINNUM_IEEE:
5667 case G_FMAXNUM_IEEE:
5668 case G_FMINIMUM:
5669 case G_FMAXIMUM:
5670 case G_FMINIMUMNUM:
5671 case G_FMAXIMUMNUM:
5672 case G_FSHL:
5673 case G_FSHR:
5674 case G_ROTL:
5675 case G_ROTR:
5676 case G_FREEZE:
5677 case G_SADDSAT:
5678 case G_SSUBSAT:
5679 case G_UADDSAT:
5680 case G_USUBSAT:
5681 case G_UMULO:
5682 case G_SMULO:
5683 case G_SHL:
5684 case G_LSHR:
5685 case G_ASHR:
5686 case G_SSHLSAT:
5687 case G_USHLSAT:
5688 case G_CTLZ:
5689 case G_CTLZ_ZERO_POISON:
5690 case G_CTTZ:
5691 case G_CTTZ_ZERO_POISON:
5692 case G_CTPOP:
5693 case G_CTLS:
5694 case G_FCOPYSIGN:
5695 case G_ZEXT:
5696 case G_SEXT:
5697 case G_ANYEXT:
5698 case G_FPEXT:
5699 case G_FPTRUNC:
5700 case G_SITOFP:
5701 case G_UITOFP:
5702 case G_FPTOSI:
5703 case G_FPTOUI:
5704 case G_FPTOSI_SAT:
5705 case G_FPTOUI_SAT:
5706 case G_INTTOPTR:
5707 case G_PTRTOINT:
5708 case G_ADDRSPACE_CAST:
5709 case G_UADDO:
5710 case G_USUBO:
5711 case G_UADDE:
5712 case G_USUBE:
5713 case G_SADDO:
5714 case G_SSUBO:
5715 case G_SADDE:
5716 case G_SSUBE:
5717 case G_STRICT_FADD:
5718 case G_STRICT_FSUB:
5719 case G_STRICT_FMUL:
5720 case G_STRICT_FMA:
5721 case G_STRICT_FLDEXP:
5722 case G_FFREXP:
5723 case G_TRUNC_SSAT_S:
5724 case G_TRUNC_SSAT_U:
5725 case G_TRUNC_USAT_U:
5726 return fewerElementsVectorMultiEltType(GMI, NumElts);
5727 case G_ICMP:
5728 case G_FCMP:
5729 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5730 case G_IS_FPCLASS:
5731 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5732 case G_SELECT:
5733 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5734 return fewerElementsVectorMultiEltType(GMI, NumElts);
5735 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5736 case G_PHI:
5737 return fewerElementsVectorPhi(GMI, NumElts);
5738 case G_UNMERGE_VALUES:
5739 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5740 case G_BUILD_VECTOR:
5741 assert(TypeIdx == 0 && "not a vector type index");
5742 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5743 case G_CONCAT_VECTORS:
5744 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5745 return UnableToLegalize;
5746 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5747 case G_EXTRACT_VECTOR_ELT:
5748 case G_INSERT_VECTOR_ELT:
5749 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5750 case G_LOAD:
5751 case G_STORE:
5752 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5753 case G_SEXT_INREG:
5754 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5756 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5757 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5758 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5759 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5760 case G_SHUFFLE_VECTOR:
5761 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5762 case G_FPOWI:
5763 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5764 case G_BITCAST:
5765 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5766 case G_INTRINSIC_FPTRUNC_ROUND:
5767 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5768 default:
5769 return UnableToLegalize;
5770 }
5771}
5772
5775 LLT NarrowTy) {
5776 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5777 "Not a bitcast operation");
5778
5779 if (TypeIdx != 0)
5780 return UnableToLegalize;
5781
5782 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5783
5784 unsigned NewElemCount =
5785 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5786 SmallVector<Register> SrcVRegs, BitcastVRegs;
5787 if (NewElemCount == 1) {
5788 LLT SrcNarrowTy = SrcTy.getElementType();
5789
5790 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5791 getUnmergeResults(SrcVRegs, *Unmerge);
5792 } else {
5793 LLT SrcNarrowTy =
5795
5796 // Split the Src and Dst Reg into smaller registers
5797 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5798 return UnableToLegalize;
5799 }
5800
5801 // Build new smaller bitcast instructions
5802 // Not supporting Leftover types for now but will have to
5803 for (Register Reg : SrcVRegs)
5804 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5805
5806 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5807 MI.eraseFromParent();
5808 return Legalized;
5809}
5810
5812 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5813 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5814 if (TypeIdx != 0)
5815 return UnableToLegalize;
5816
5817 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5818 MI.getFirst3RegLLTs();
5819 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5820 // The shuffle should be canonicalized by now.
5821 if (DstTy != Src1Ty)
5822 return UnableToLegalize;
5823 if (DstTy != Src2Ty)
5824 return UnableToLegalize;
5825
5826 if (!isPowerOf2_32(DstTy.getNumElements()))
5827 return UnableToLegalize;
5828
5829 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5830 // Further legalization attempts will be needed to do split further.
5831 NarrowTy =
5832 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5833 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5834
5835 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5836 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5837 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5838 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5839 SplitSrc2Regs[1]};
5840
5841 Register Hi, Lo;
5842
5843 // If Lo or Hi uses elements from at most two of the four input vectors, then
5844 // express it as a vector shuffle of those two inputs. Otherwise extract the
5845 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5847 for (unsigned High = 0; High < 2; ++High) {
5848 Register &Output = High ? Hi : Lo;
5849
5850 // Build a shuffle mask for the output, discovering on the fly which
5851 // input vectors to use as shuffle operands (recorded in InputUsed).
5852 // If building a suitable shuffle vector proves too hard, then bail
5853 // out with useBuildVector set.
5854 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5855 unsigned FirstMaskIdx = High * NewElts;
5856 bool UseBuildVector = false;
5857 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5858 // The mask element. This indexes into the input.
5859 int Idx = Mask[FirstMaskIdx + MaskOffset];
5860
5861 // The input vector this mask element indexes into.
5862 unsigned Input = (unsigned)Idx / NewElts;
5863
5864 if (Input >= std::size(Inputs)) {
5865 // The mask element does not index into any input vector.
5866 Ops.push_back(-1);
5867 continue;
5868 }
5869
5870 // Turn the index into an offset from the start of the input vector.
5871 Idx -= Input * NewElts;
5872
5873 // Find or create a shuffle vector operand to hold this input.
5874 unsigned OpNo;
5875 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5876 if (InputUsed[OpNo] == Input) {
5877 // This input vector is already an operand.
5878 break;
5879 } else if (InputUsed[OpNo] == -1U) {
5880 // Create a new operand for this input vector.
5881 InputUsed[OpNo] = Input;
5882 break;
5883 }
5884 }
5885
5886 if (OpNo >= std::size(InputUsed)) {
5887 // More than two input vectors used! Give up on trying to create a
5888 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5889 UseBuildVector = true;
5890 break;
5891 }
5892
5893 // Add the mask index for the new shuffle vector.
5894 Ops.push_back(Idx + OpNo * NewElts);
5895 }
5896
5897 if (UseBuildVector) {
5898 LLT EltTy = NarrowTy.getElementType();
5900
5901 // Extract the input elements by hand.
5902 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5903 // The mask element. This indexes into the input.
5904 int Idx = Mask[FirstMaskIdx + MaskOffset];
5905
5906 // The input vector this mask element indexes into.
5907 unsigned Input = (unsigned)Idx / NewElts;
5908
5909 if (Input >= std::size(Inputs)) {
5910 // The mask element is "undef" or indexes off the end of the input.
5911 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5912 continue;
5913 }
5914
5915 // Turn the index into an offset from the start of the input vector.
5916 Idx -= Input * NewElts;
5917
5918 // Extract the vector element by hand.
5919 SVOps.push_back(MIRBuilder
5920 .buildExtractVectorElement(
5921 EltTy, Inputs[Input],
5922 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5923 .getReg(0));
5924 }
5925
5926 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5927 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5928 } else if (InputUsed[0] == -1U) {
5929 // No input vectors were used! The result is undefined.
5930 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5931 } else if (NewElts == 1) {
5932 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5933 } else {
5934 Register Op0 = Inputs[InputUsed[0]];
5935 // If only one input was used, use an undefined vector for the other.
5936 Register Op1 = InputUsed[1] == -1U
5937 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5938 : Inputs[InputUsed[1]];
5939 // At least one input vector was used. Create a new shuffle vector.
5940 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5941 }
5942
5943 Ops.clear();
5944 }
5945
5946 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5947 MI.eraseFromParent();
5948 return Legalized;
5949}
5950
5952 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5953 auto &RdxMI = cast<GVecReduce>(MI);
5954
5955 if (TypeIdx != 1)
5956 return UnableToLegalize;
5957
5958 // The semantics of the normal non-sequential reductions allow us to freely
5959 // re-associate the operation.
5960 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5961
5962 if (NarrowTy.isVector() &&
5963 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5964 return UnableToLegalize;
5965
5966 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5967 SmallVector<Register> SplitSrcs;
5968 // If NarrowTy is a scalar then we're being asked to scalarize.
5969 const unsigned NumParts =
5970 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5971 : SrcTy.getNumElements();
5972
5973 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5974 if (NarrowTy.isScalar()) {
5975 if (DstTy != NarrowTy)
5976 return UnableToLegalize; // FIXME: handle implicit extensions.
5977
5978 if (isPowerOf2_32(NumParts)) {
5979 // Generate a tree of scalar operations to reduce the critical path.
5980 SmallVector<Register> PartialResults;
5981 unsigned NumPartsLeft = NumParts;
5982 while (NumPartsLeft > 1) {
5983 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5984 PartialResults.emplace_back(
5986 .buildInstr(ScalarOpc, {NarrowTy},
5987 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5988 .getReg(0));
5989 }
5990 SplitSrcs = PartialResults;
5991 PartialResults.clear();
5992 NumPartsLeft = SplitSrcs.size();
5993 }
5994 assert(SplitSrcs.size() == 1);
5995 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5996 MI.eraseFromParent();
5997 return Legalized;
5998 }
5999 // If we can't generate a tree, then just do sequential operations.
6000 Register Acc = SplitSrcs[0];
6001 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
6002 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6003 .getReg(0);
6004 MIRBuilder.buildCopy(DstReg, Acc);
6005 MI.eraseFromParent();
6006 return Legalized;
6007 }
6008 SmallVector<Register> PartialReductions;
6009 for (unsigned Part = 0; Part < NumParts; ++Part) {
6010 PartialReductions.push_back(
6011 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6012 .getReg(0));
6013 }
6014
6015 // If the types involved are powers of 2, we can generate intermediate vector
6016 // ops, before generating a final reduction operation.
6017 if (isPowerOf2_32(SrcTy.getNumElements()) &&
6018 isPowerOf2_32(NarrowTy.getNumElements())) {
6019 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6020 }
6021
6022 Register Acc = PartialReductions[0];
6023 for (unsigned Part = 1; Part < NumParts; ++Part) {
6024 if (Part == NumParts - 1) {
6025 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
6026 {Acc, PartialReductions[Part]});
6027 } else {
6028 Acc = MIRBuilder
6029 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6030 .getReg(0);
6031 }
6032 }
6033 MI.eraseFromParent();
6034 return Legalized;
6035}
6036
6039 unsigned int TypeIdx,
6040 LLT NarrowTy) {
6041 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6042 MI.getFirst3RegLLTs();
6043 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6044 DstTy != NarrowTy)
6045 return UnableToLegalize;
6046
6047 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6048 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6049 "Unexpected vecreduce opcode");
6050 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6051 ? TargetOpcode::G_FADD
6052 : TargetOpcode::G_FMUL;
6053
6054 SmallVector<Register> SplitSrcs;
6055 unsigned NumParts = SrcTy.getNumElements();
6056 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
6057 Register Acc = ScalarReg;
6058 for (unsigned i = 0; i < NumParts; i++)
6059 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6060 .getReg(0);
6061
6062 MIRBuilder.buildCopy(DstReg, Acc);
6063 MI.eraseFromParent();
6064 return Legalized;
6065}
6066
6068LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
6069 LLT SrcTy, LLT NarrowTy,
6070 unsigned ScalarOpc) {
6071 SmallVector<Register> SplitSrcs;
6072 // Split the sources into NarrowTy size pieces.
6073 extractParts(SrcReg, NarrowTy,
6074 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
6075 MIRBuilder, MRI);
6076 // We're going to do a tree reduction using vector operations until we have
6077 // one NarrowTy size value left.
6078 while (SplitSrcs.size() > 1) {
6079 SmallVector<Register> PartialRdxs;
6080 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6081 Register LHS = SplitSrcs[Idx];
6082 Register RHS = SplitSrcs[Idx + 1];
6083 // Create the intermediate vector op.
6084 Register Res =
6085 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6086 PartialRdxs.push_back(Res);
6087 }
6088 SplitSrcs = std::move(PartialRdxs);
6089 }
6090 // Finally generate the requested NarrowTy based reduction.
6091 Observer.changingInstr(MI);
6092 MI.getOperand(1).setReg(SplitSrcs[0]);
6093 Observer.changedInstr(MI);
6094 return Legalized;
6095}
6096
6099 const LLT HalfTy, const LLT AmtTy) {
6100
6101 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6102 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6103 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6104
6105 if (Amt.isZero()) {
6106 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6107 MI.eraseFromParent();
6108 return Legalized;
6109 }
6110
6111 LLT NVT = HalfTy;
6112 unsigned NVTBits = HalfTy.getSizeInBits();
6113 unsigned VTBits = 2 * NVTBits;
6114
6115 SrcOp Lo(Register(0)), Hi(Register(0));
6116 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6117 if (Amt.ugt(VTBits)) {
6118 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6119 } else if (Amt.ugt(NVTBits)) {
6120 Lo = MIRBuilder.buildConstant(NVT, 0);
6121 Hi = MIRBuilder.buildShl(NVT, InL,
6122 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6123 } else if (Amt == NVTBits) {
6124 Lo = MIRBuilder.buildConstant(NVT, 0);
6125 Hi = InL;
6126 } else {
6127 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6128 auto OrLHS =
6129 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6130 auto OrRHS = MIRBuilder.buildLShr(
6131 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6132 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6133 }
6134 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6135 if (Amt.ugt(VTBits)) {
6136 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6137 } else if (Amt.ugt(NVTBits)) {
6138 Lo = MIRBuilder.buildLShr(NVT, InH,
6139 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6140 Hi = MIRBuilder.buildConstant(NVT, 0);
6141 } else if (Amt == NVTBits) {
6142 Lo = InH;
6143 Hi = MIRBuilder.buildConstant(NVT, 0);
6144 } else {
6145 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6146
6147 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6148 auto OrRHS = MIRBuilder.buildShl(
6149 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6150
6151 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6152 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6153 }
6154 } else {
6155 if (Amt.ugt(VTBits)) {
6156 Hi = Lo = MIRBuilder.buildAShr(
6157 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6158 } else if (Amt.ugt(NVTBits)) {
6159 Lo = MIRBuilder.buildAShr(NVT, InH,
6160 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6161 Hi = MIRBuilder.buildAShr(NVT, InH,
6162 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6163 } else if (Amt == NVTBits) {
6164 Lo = InH;
6165 Hi = MIRBuilder.buildAShr(NVT, InH,
6166 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6167 } else {
6168 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6169
6170 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6171 auto OrRHS = MIRBuilder.buildShl(
6172 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6173
6174 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6175 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6176 }
6177 }
6178
6179 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6180 MI.eraseFromParent();
6181
6182 return Legalized;
6183}
6184
6187 LLT RequestedTy) {
6188 if (TypeIdx == 1) {
6189 Observer.changingInstr(MI);
6190 narrowScalarSrc(MI, RequestedTy, 2);
6191 Observer.changedInstr(MI);
6192 return Legalized;
6193 }
6194
6195 Register DstReg = MI.getOperand(0).getReg();
6196 LLT DstTy = MRI.getType(DstReg);
6197 if (DstTy.isVector())
6198 return UnableToLegalize;
6199
6200 Register Amt = MI.getOperand(2).getReg();
6201 LLT ShiftAmtTy = MRI.getType(Amt);
6202 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6203 if (DstEltSize % 2 != 0)
6204 return UnableToLegalize;
6205
6206 // Check if we should use multi-way splitting instead of recursive binary
6207 // splitting.
6208 //
6209 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6210 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6211 // and dependency chains created by usual binary splitting approach
6212 // (128->64->32).
6213 //
6214 // The >= 8 parts threshold ensures we only use this optimization when binary
6215 // splitting would require multiple recursive passes, avoiding overhead for
6216 // simple 2-way splits where binary approach is sufficient.
6217 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6218 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6219 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6220 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6221 // steps).
6222 if (NumParts >= 8)
6223 return narrowScalarShiftMultiway(MI, RequestedTy);
6224 }
6225
6226 // Fall back to binary splitting:
6227 // Ignore the input type. We can only go to exactly half the size of the
6228 // input. If that isn't small enough, the resulting pieces will be further
6229 // legalized.
6230 const unsigned NewBitSize = DstEltSize / 2;
6231 const LLT HalfTy = DstTy.getScalarType().changeElementSize(NewBitSize);
6232 const LLT CondTy = LLT::integer(1);
6233
6234 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6235 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6236 ShiftAmtTy);
6237 }
6238
6239 // TODO: Expand with known bits.
6240
6241 // Handle the fully general expansion by an unknown amount.
6242 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6243
6244 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6245 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6246 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6247
6248 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6249 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6250
6251 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6252 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6253 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6254
6255 Register ResultRegs[2];
6256 switch (MI.getOpcode()) {
6257 case TargetOpcode::G_SHL: {
6258 // Short: ShAmt < NewBitSize
6259 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6260
6261 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6262 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6263 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6264
6265 // Long: ShAmt >= NewBitSize
6266 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6267 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6268
6269 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6270 auto Hi = MIRBuilder.buildSelect(
6271 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6272
6273 ResultRegs[0] = Lo.getReg(0);
6274 ResultRegs[1] = Hi.getReg(0);
6275 break;
6276 }
6277 case TargetOpcode::G_LSHR:
6278 case TargetOpcode::G_ASHR: {
6279 // Short: ShAmt < NewBitSize
6280 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6281
6282 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6283 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6284 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6285
6286 // Long: ShAmt >= NewBitSize
6288 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6289 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6290 } else {
6291 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6292 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6293 }
6294 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6295 {InH, AmtExcess}); // Lo from Hi part.
6296
6297 auto Lo = MIRBuilder.buildSelect(
6298 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6299
6300 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6301
6302 ResultRegs[0] = Lo.getReg(0);
6303 ResultRegs[1] = Hi.getReg(0);
6304 break;
6305 }
6306 default:
6307 llvm_unreachable("not a shift");
6308 }
6309
6310 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6311 MI.eraseFromParent();
6312 return Legalized;
6313}
6314
6316 unsigned PartIdx,
6317 unsigned NumParts,
6318 ArrayRef<Register> SrcParts,
6319 const ShiftParams &Params,
6320 LLT TargetTy, LLT ShiftAmtTy) {
6321 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6322 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6323 assert(WordShiftConst && BitShiftConst && "Expected constants");
6324
6325 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6326 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6327 const bool NeedsInterWordShift = ShiftBits != 0;
6328
6329 switch (Opcode) {
6330 case TargetOpcode::G_SHL: {
6331 // Data moves from lower indices to higher indices
6332 // If this part would come from a source beyond our range, it's zero
6333 if (PartIdx < ShiftWords)
6334 return Params.Zero;
6335
6336 unsigned SrcIdx = PartIdx - ShiftWords;
6337 if (!NeedsInterWordShift)
6338 return SrcParts[SrcIdx];
6339
6340 // Combine shifted main part with carry from previous part
6341 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6342 if (SrcIdx > 0) {
6343 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6344 Params.InvBitShift);
6345 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6346 }
6347 return Hi.getReg(0);
6348 }
6349
6350 case TargetOpcode::G_LSHR: {
6351 unsigned SrcIdx = PartIdx + ShiftWords;
6352 if (SrcIdx >= NumParts)
6353 return Params.Zero;
6354 if (!NeedsInterWordShift)
6355 return SrcParts[SrcIdx];
6356
6357 // Combine shifted main part with carry from next part
6358 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6359 if (SrcIdx + 1 < NumParts) {
6360 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6361 Params.InvBitShift);
6362 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6363 }
6364 return Lo.getReg(0);
6365 }
6366
6367 case TargetOpcode::G_ASHR: {
6368 // Like LSHR but preserves sign bit
6369 unsigned SrcIdx = PartIdx + ShiftWords;
6370 if (SrcIdx >= NumParts)
6371 return Params.SignBit;
6372 if (!NeedsInterWordShift)
6373 return SrcParts[SrcIdx];
6374
6375 // Only the original MSB part uses arithmetic shift to preserve sign. All
6376 // other parts use logical shift since they're just moving data bits.
6377 auto Lo =
6378 (SrcIdx == NumParts - 1)
6379 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6380 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6381 Register HiSrc =
6382 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6383 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6384 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6385 }
6386
6387 default:
6388 llvm_unreachable("not a shift");
6389 }
6390}
6391
6393 Register MainOperand,
6394 Register ShiftAmt,
6395 LLT TargetTy,
6396 Register CarryOperand) {
6397 // This helper generates a single output part for variable shifts by combining
6398 // the main operand (shifted by BitShift) with carry bits from an adjacent
6399 // part.
6400
6401 // For G_ASHR, individual parts don't have their own sign bit, only the
6402 // complete value does. So we use LSHR for the main operand shift in ASHR
6403 // context.
6404 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6405 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6406 : Opcode;
6407
6408 // Perform the primary shift on the main operand
6409 Register MainShifted =
6410 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6411 .getReg(0);
6412
6413 // No carry operand available
6414 if (!CarryOperand.isValid())
6415 return MainShifted;
6416
6417 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6418 // so carry bits aren't needed.
6419 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6420 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6421 LLT BoolTy = LLT::scalar(1);
6422 auto IsZeroBitShift =
6423 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6424
6425 // Extract bits from the adjacent part that will "carry over" into this part.
6426 // The carry direction is opposite to the main shift direction, so we can
6427 // align the two shifted values before combining them with OR.
6428
6429 // Determine the carry shift opcode (opposite direction)
6430 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6431 : TargetOpcode::G_SHL;
6432
6433 // Calculate inverse shift amount: BitWidth - ShiftAmt
6434 auto TargetBitsConst =
6435 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6436 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6437
6438 // Shift the carry operand
6439 Register CarryBits =
6441 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6442 .getReg(0);
6443
6444 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6445 // TargetBits which would be poison for the individual carry shift operation).
6446 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6447 Register SafeCarryBits =
6448 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6449 .getReg(0);
6450
6451 // Combine the main shifted part with the carry bits
6452 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6453}
6454
6457 const APInt &Amt,
6458 LLT TargetTy,
6459 LLT ShiftAmtTy) {
6460 // Any wide shift can be decomposed into WordShift + BitShift components.
6461 // When shift amount is known constant, directly compute the decomposition
6462 // values and generate constant registers.
6463 Register DstReg = MI.getOperand(0).getReg();
6464 Register SrcReg = MI.getOperand(1).getReg();
6465 LLT DstTy = MRI.getType(DstReg);
6466
6467 const unsigned DstBits = DstTy.getScalarSizeInBits();
6468 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6469 const unsigned NumParts = DstBits / TargetBits;
6470
6471 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6472
6473 // When the shift amount is known at compile time, we just calculate which
6474 // source parts contribute to each output part.
6475
6476 SmallVector<Register, 8> SrcParts;
6477 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6478
6479 if (Amt.isZero()) {
6480 // No shift needed, just copy
6481 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6482 MI.eraseFromParent();
6483 return Legalized;
6484 }
6485
6486 ShiftParams Params;
6487 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6488 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6489
6490 // Generate constants and values needed by all shift types
6491 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6492 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6493 Params.InvBitShift =
6494 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6495 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6496
6497 // For ASHR, we need the sign-extended value to fill shifted-out positions
6498 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6499 Params.SignBit =
6501 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6502 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6503 .getReg(0);
6504
6505 SmallVector<Register, 8> DstParts(NumParts);
6506 for (unsigned I = 0; I < NumParts; ++I)
6507 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6508 Params, TargetTy, ShiftAmtTy);
6509
6510 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6511 MI.eraseFromParent();
6512 return Legalized;
6513}
6514
6517 Register DstReg = MI.getOperand(0).getReg();
6518 Register SrcReg = MI.getOperand(1).getReg();
6519 Register AmtReg = MI.getOperand(2).getReg();
6520 LLT DstTy = MRI.getType(DstReg);
6521 LLT ShiftAmtTy = MRI.getType(AmtReg);
6522
6523 const unsigned DstBits = DstTy.getScalarSizeInBits();
6524 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6525 const unsigned NumParts = DstBits / TargetBits;
6526
6527 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6528 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6529
6530 // If the shift amount is known at compile time, we can use direct indexing
6531 // instead of generating select chains in the general case.
6532 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6533 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6534 ShiftAmtTy);
6535
6536 // For runtime-variable shift amounts, we must generate a more complex
6537 // sequence that handles all possible shift values using select chains.
6538
6539 // Split the input into target-sized pieces
6540 SmallVector<Register, 8> SrcParts;
6541 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6542
6543 // Shifting by zero should be a no-op.
6544 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6545 LLT BoolTy = LLT::scalar(1);
6546 auto IsZeroShift =
6547 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6548
6549 // Any wide shift can be decomposed into two components:
6550 // 1. WordShift: number of complete target-sized words to shift
6551 // 2. BitShift: number of bits to shift within each word
6552 //
6553 // Example: 128-bit >> 50 with 32-bit target:
6554 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6555 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6556 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6557 auto TargetBitsLog2Const =
6558 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6559 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6560
6561 Register WordShift =
6562 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6563 Register BitShift =
6564 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6565
6566 // Fill values:
6567 // - SHL/LSHR: fill with zeros
6568 // - ASHR: fill with sign-extended MSB
6569 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6570
6571 Register FillValue;
6572 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6573 auto TargetBitsMinusOneConst =
6574 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6575 FillValue = MIRBuilder
6576 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6577 TargetBitsMinusOneConst)
6578 .getReg(0);
6579 } else {
6580 FillValue = ZeroReg;
6581 }
6582
6583 SmallVector<Register, 8> DstParts(NumParts);
6584
6585 // For each output part, generate a select chain that chooses the correct
6586 // result based on the runtime WordShift value. This handles all possible
6587 // word shift amounts by pre-calculating what each would produce.
6588 for (unsigned I = 0; I < NumParts; ++I) {
6589 // Initialize with appropriate default value for this shift type
6590 Register InBoundsResult = FillValue;
6591
6592 // clang-format off
6593 // Build a branchless select chain by pre-computing results for all possible
6594 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6595 //
6596 // K=0: select(WordShift==0, result0, FillValue)
6597 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6598 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6599 // clang-format on
6600 for (unsigned K = 0; K < NumParts; ++K) {
6601 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6602 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6603 WordShift, WordShiftKConst);
6604
6605 // Calculate source indices for this word shift
6606 //
6607 // For 4-part 128-bit value with K=1 word shift:
6608 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6609 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6610 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6611 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6612 int MainSrcIdx;
6613 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6614
6615 switch (MI.getOpcode()) {
6616 case TargetOpcode::G_SHL:
6617 MainSrcIdx = (int)I - (int)K;
6618 CarrySrcIdx = MainSrcIdx - 1;
6619 break;
6620 case TargetOpcode::G_LSHR:
6621 case TargetOpcode::G_ASHR:
6622 MainSrcIdx = (int)I + (int)K;
6623 CarrySrcIdx = MainSrcIdx + 1;
6624 break;
6625 default:
6626 llvm_unreachable("Not a shift");
6627 }
6628
6629 // Check bounds and build the result for this word shift
6630 Register ResultForK;
6631 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6632 Register MainOp = SrcParts[MainSrcIdx];
6633 Register CarryOp;
6634
6635 // Determine carry operand with bounds checking
6636 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6637 CarryOp = SrcParts[CarrySrcIdx];
6638 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6639 CarrySrcIdx >= (int)NumParts)
6640 CarryOp = FillValue; // Use sign extension
6641
6642 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6643 TargetTy, CarryOp);
6644 } else {
6645 // Out of bounds - use fill value for this k
6646 ResultForK = FillValue;
6647 }
6648
6649 // Select this result if WordShift equals k
6650 InBoundsResult =
6652 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6653 .getReg(0);
6654 }
6655
6656 // Handle zero-shift special case: if shift is 0, use original input
6657 DstParts[I] =
6659 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6660 .getReg(0);
6661 }
6662
6663 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6664 MI.eraseFromParent();
6665 return Legalized;
6666}
6667
6670 LLT MoreTy) {
6671 assert(TypeIdx == 0 && "Expecting only Idx 0");
6672
6673 Observer.changingInstr(MI);
6674 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6675 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6676 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6677 moreElementsVectorSrc(MI, MoreTy, I);
6678 }
6679
6680 MachineBasicBlock &MBB = *MI.getParent();
6681 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6682 moreElementsVectorDst(MI, MoreTy, 0);
6683 Observer.changedInstr(MI);
6684 return Legalized;
6685}
6686
6687MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6688 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6689 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6690
6691 switch (Opcode) {
6692 default:
6694 "getNeutralElementForVecReduce called with invalid opcode!");
6695 case TargetOpcode::G_VECREDUCE_ADD:
6696 case TargetOpcode::G_VECREDUCE_OR:
6697 case TargetOpcode::G_VECREDUCE_XOR:
6698 case TargetOpcode::G_VECREDUCE_UMAX:
6699 return MIRBuilder.buildConstant(Ty, 0);
6700 case TargetOpcode::G_VECREDUCE_MUL:
6701 return MIRBuilder.buildConstant(Ty, 1);
6702 case TargetOpcode::G_VECREDUCE_AND:
6703 case TargetOpcode::G_VECREDUCE_UMIN:
6705 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6706 case TargetOpcode::G_VECREDUCE_SMAX:
6708 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6709 case TargetOpcode::G_VECREDUCE_SMIN:
6711 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6712 case TargetOpcode::G_VECREDUCE_FADD:
6713 return MIRBuilder.buildFConstant(Ty, -0.0);
6714 case TargetOpcode::G_VECREDUCE_FMUL:
6715 return MIRBuilder.buildFConstant(Ty, 1.0);
6716 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6717 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6718 assert(false && "getNeutralElementForVecReduce unimplemented for "
6719 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6720 }
6721 llvm_unreachable("switch expected to return!");
6722}
6723
6726 LLT MoreTy) {
6727 unsigned Opc = MI.getOpcode();
6728 switch (Opc) {
6729 case TargetOpcode::G_IMPLICIT_DEF:
6730 case TargetOpcode::G_LOAD: {
6731 if (TypeIdx != 0)
6732 return UnableToLegalize;
6733 Observer.changingInstr(MI);
6734 moreElementsVectorDst(MI, MoreTy, 0);
6735 Observer.changedInstr(MI);
6736 return Legalized;
6737 }
6738 case TargetOpcode::G_STORE:
6739 if (TypeIdx != 0)
6740 return UnableToLegalize;
6741 Observer.changingInstr(MI);
6742 moreElementsVectorSrc(MI, MoreTy, 0);
6743 Observer.changedInstr(MI);
6744 return Legalized;
6745 case TargetOpcode::G_AND:
6746 case TargetOpcode::G_OR:
6747 case TargetOpcode::G_XOR:
6748 case TargetOpcode::G_ADD:
6749 case TargetOpcode::G_SUB:
6750 case TargetOpcode::G_MUL:
6751 case TargetOpcode::G_FADD:
6752 case TargetOpcode::G_FSUB:
6753 case TargetOpcode::G_FMUL:
6754 case TargetOpcode::G_FDIV:
6755 case TargetOpcode::G_FCOPYSIGN:
6756 case TargetOpcode::G_UADDSAT:
6757 case TargetOpcode::G_USUBSAT:
6758 case TargetOpcode::G_SADDSAT:
6759 case TargetOpcode::G_SSUBSAT:
6760 case TargetOpcode::G_SMIN:
6761 case TargetOpcode::G_SMAX:
6762 case TargetOpcode::G_UMIN:
6763 case TargetOpcode::G_UMAX:
6764 case TargetOpcode::G_FMINNUM:
6765 case TargetOpcode::G_FMAXNUM:
6766 case TargetOpcode::G_FMINNUM_IEEE:
6767 case TargetOpcode::G_FMAXNUM_IEEE:
6768 case TargetOpcode::G_FMINIMUM:
6769 case TargetOpcode::G_FMAXIMUM:
6770 case TargetOpcode::G_FMINIMUMNUM:
6771 case TargetOpcode::G_FMAXIMUMNUM:
6772 case TargetOpcode::G_STRICT_FADD:
6773 case TargetOpcode::G_STRICT_FSUB:
6774 case TargetOpcode::G_STRICT_FMUL: {
6775 Observer.changingInstr(MI);
6776 moreElementsVectorSrc(MI, MoreTy, 1);
6777 moreElementsVectorSrc(MI, MoreTy, 2);
6778 moreElementsVectorDst(MI, MoreTy, 0);
6779 Observer.changedInstr(MI);
6780 return Legalized;
6781 }
6782 case TargetOpcode::G_SHL:
6783 case TargetOpcode::G_ASHR:
6784 case TargetOpcode::G_LSHR: {
6785 Observer.changingInstr(MI);
6786 moreElementsVectorSrc(MI, MoreTy, 1);
6787 // The shift operand may have a different scalar type from the source and
6788 // destination operands.
6789 LLT ShiftMoreTy = MoreTy.changeElementType(
6790 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6791 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6792 moreElementsVectorDst(MI, MoreTy, 0);
6793 Observer.changedInstr(MI);
6794 return Legalized;
6795 }
6796 case TargetOpcode::G_FMA:
6797 case TargetOpcode::G_STRICT_FMA:
6798 case TargetOpcode::G_FSHR:
6799 case TargetOpcode::G_FSHL: {
6800 Observer.changingInstr(MI);
6801 moreElementsVectorSrc(MI, MoreTy, 1);
6802 moreElementsVectorSrc(MI, MoreTy, 2);
6803 moreElementsVectorSrc(MI, MoreTy, 3);
6804 moreElementsVectorDst(MI, MoreTy, 0);
6805 Observer.changedInstr(MI);
6806 return Legalized;
6807 }
6808 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6809 case TargetOpcode::G_EXTRACT:
6810 if (TypeIdx != 1)
6811 return UnableToLegalize;
6812 Observer.changingInstr(MI);
6813 moreElementsVectorSrc(MI, MoreTy, 1);
6814 Observer.changedInstr(MI);
6815 return Legalized;
6816 case TargetOpcode::G_INSERT:
6817 case TargetOpcode::G_INSERT_VECTOR_ELT:
6818 case TargetOpcode::G_FREEZE:
6819 case TargetOpcode::G_FNEG:
6820 case TargetOpcode::G_FABS:
6821 case TargetOpcode::G_FSQRT:
6822 case TargetOpcode::G_FCEIL:
6823 case TargetOpcode::G_FFLOOR:
6824 case TargetOpcode::G_FNEARBYINT:
6825 case TargetOpcode::G_FRINT:
6826 case TargetOpcode::G_INTRINSIC_ROUND:
6827 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6828 case TargetOpcode::G_INTRINSIC_TRUNC:
6829 case TargetOpcode::G_BITREVERSE:
6830 case TargetOpcode::G_BSWAP:
6831 case TargetOpcode::G_FCANONICALIZE:
6832 case TargetOpcode::G_SEXT_INREG:
6833 case TargetOpcode::G_ABS:
6834 case TargetOpcode::G_CTLZ:
6835 case TargetOpcode::G_CTPOP:
6836 if (TypeIdx != 0)
6837 return UnableToLegalize;
6838 Observer.changingInstr(MI);
6839 moreElementsVectorSrc(MI, MoreTy, 1);
6840 moreElementsVectorDst(MI, MoreTy, 0);
6841 Observer.changedInstr(MI);
6842 return Legalized;
6843 case TargetOpcode::G_SELECT: {
6844 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6845 if (TypeIdx == 1) {
6846 if (!CondTy.isScalar() ||
6847 DstTy.getElementCount() != MoreTy.getElementCount())
6848 return UnableToLegalize;
6849
6850 // This is turning a scalar select of vectors into a vector
6851 // select. Broadcast the select condition.
6852 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6853 Observer.changingInstr(MI);
6854 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6855 Observer.changedInstr(MI);
6856 return Legalized;
6857 }
6858
6859 if (CondTy.isVector())
6860 return UnableToLegalize;
6861
6862 Observer.changingInstr(MI);
6863 moreElementsVectorSrc(MI, MoreTy, 2);
6864 moreElementsVectorSrc(MI, MoreTy, 3);
6865 moreElementsVectorDst(MI, MoreTy, 0);
6866 Observer.changedInstr(MI);
6867 return Legalized;
6868 }
6869 case TargetOpcode::G_UNMERGE_VALUES:
6870 return UnableToLegalize;
6871 case TargetOpcode::G_PHI:
6872 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6873 case TargetOpcode::G_SHUFFLE_VECTOR:
6874 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6875 case TargetOpcode::G_BUILD_VECTOR: {
6877 for (auto Op : MI.uses()) {
6878 Elts.push_back(Op.getReg());
6879 }
6880
6881 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6882 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6883 }
6884
6885 MIRBuilder.buildDeleteTrailingVectorElements(
6886 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6887 MI.eraseFromParent();
6888 return Legalized;
6889 }
6890 case TargetOpcode::G_SEXT:
6891 case TargetOpcode::G_ZEXT:
6892 case TargetOpcode::G_ANYEXT:
6893 case TargetOpcode::G_TRUNC:
6894 case TargetOpcode::G_FPTRUNC:
6895 case TargetOpcode::G_FPEXT:
6896 case TargetOpcode::G_FPTOSI:
6897 case TargetOpcode::G_FPTOUI:
6898 case TargetOpcode::G_FPTOSI_SAT:
6899 case TargetOpcode::G_FPTOUI_SAT:
6900 case TargetOpcode::G_SITOFP:
6901 case TargetOpcode::G_UITOFP: {
6902 Observer.changingInstr(MI);
6903 LLT SrcExtTy;
6904 LLT DstExtTy;
6905 if (TypeIdx == 0) {
6906 DstExtTy = MoreTy;
6907 SrcExtTy = MoreTy.changeElementType(
6908 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6909 } else {
6910 DstExtTy = MoreTy.changeElementType(
6911 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6912 SrcExtTy = MoreTy;
6913 }
6914 moreElementsVectorSrc(MI, SrcExtTy, 1);
6915 moreElementsVectorDst(MI, DstExtTy, 0);
6916 Observer.changedInstr(MI);
6917 return Legalized;
6918 }
6919 case TargetOpcode::G_ICMP:
6920 case TargetOpcode::G_FCMP: {
6921 if (TypeIdx != 1)
6922 return UnableToLegalize;
6923
6924 Observer.changingInstr(MI);
6925 moreElementsVectorSrc(MI, MoreTy, 2);
6926 moreElementsVectorSrc(MI, MoreTy, 3);
6927 LLT CondTy = MoreTy.changeVectorElementType(
6928 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6929 moreElementsVectorDst(MI, CondTy, 0);
6930 Observer.changedInstr(MI);
6931 return Legalized;
6932 }
6933 case TargetOpcode::G_BITCAST: {
6934 if (TypeIdx != 0)
6935 return UnableToLegalize;
6936
6937 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6938 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6939
6940 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6941 if (coefficient % DstTy.getNumElements() != 0)
6942 return UnableToLegalize;
6943
6944 coefficient = coefficient / DstTy.getNumElements();
6945
6946 LLT NewTy = SrcTy.changeElementCount(
6947 ElementCount::get(coefficient, MoreTy.isScalable()));
6948 Observer.changingInstr(MI);
6949 moreElementsVectorSrc(MI, NewTy, 1);
6950 moreElementsVectorDst(MI, MoreTy, 0);
6951 Observer.changedInstr(MI);
6952 return Legalized;
6953 }
6954 case TargetOpcode::G_VECREDUCE_FADD:
6955 case TargetOpcode::G_VECREDUCE_FMUL:
6956 case TargetOpcode::G_VECREDUCE_ADD:
6957 case TargetOpcode::G_VECREDUCE_MUL:
6958 case TargetOpcode::G_VECREDUCE_AND:
6959 case TargetOpcode::G_VECREDUCE_OR:
6960 case TargetOpcode::G_VECREDUCE_XOR:
6961 case TargetOpcode::G_VECREDUCE_SMAX:
6962 case TargetOpcode::G_VECREDUCE_SMIN:
6963 case TargetOpcode::G_VECREDUCE_UMAX:
6964 case TargetOpcode::G_VECREDUCE_UMIN: {
6965 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6966 MachineOperand &MO = MI.getOperand(1);
6967 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6968 auto NeutralElement = getNeutralElementForVecReduce(
6969 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6970
6971 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6972 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6973 i != e; i++) {
6974 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6975 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6976 NeutralElement, Idx);
6977 }
6978
6979 Observer.changingInstr(MI);
6980 MO.setReg(NewVec.getReg(0));
6981 Observer.changedInstr(MI);
6982 return Legalized;
6983 }
6984
6985 default:
6986 return UnableToLegalize;
6987 }
6988}
6989
6992 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6993 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6994 unsigned MaskNumElts = Mask.size();
6995 unsigned SrcNumElts = SrcTy.getNumElements();
6996 LLT DestEltTy = DstTy.getElementType();
6997
6998 if (MaskNumElts == SrcNumElts)
6999 return Legalized;
7000
7001 if (MaskNumElts < SrcNumElts) {
7002 // Extend mask to match new destination vector size with
7003 // undef values.
7004 SmallVector<int, 16> NewMask(SrcNumElts, -1);
7005 llvm::copy(Mask, NewMask.begin());
7006
7007 moreElementsVectorDst(MI, SrcTy, 0);
7008 MIRBuilder.setInstrAndDebugLoc(MI);
7009 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7010 MI.getOperand(1).getReg(),
7011 MI.getOperand(2).getReg(), NewMask);
7012 MI.eraseFromParent();
7013
7014 return Legalized;
7015 }
7016
7017 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
7018 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7019 LLT PaddedTy =
7020 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
7021
7022 // Create new source vectors by concatenating the initial
7023 // source vectors with undefined vectors of the same size.
7024 auto Undef = MIRBuilder.buildUndef(SrcTy);
7025 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
7026 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
7027 MOps1[0] = MI.getOperand(1).getReg();
7028 MOps2[0] = MI.getOperand(2).getReg();
7029
7030 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7031 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7032
7033 // Readjust mask for new input vector length.
7034 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
7035 for (unsigned I = 0; I != MaskNumElts; ++I) {
7036 int Idx = Mask[I];
7037 if (Idx >= static_cast<int>(SrcNumElts))
7038 Idx += PaddedMaskNumElts - SrcNumElts;
7039 MappedOps[I] = Idx;
7040 }
7041
7042 // If we got more elements than required, extract subvector.
7043 if (MaskNumElts != PaddedMaskNumElts) {
7044 auto Shuffle =
7045 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7046
7047 SmallVector<Register, 16> Elts(MaskNumElts);
7048 for (unsigned I = 0; I < MaskNumElts; ++I) {
7049 Elts[I] =
7050 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
7051 .getReg(0);
7052 }
7053 MIRBuilder.buildBuildVector(DstReg, Elts);
7054 } else {
7055 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7056 }
7057
7058 MI.eraseFromParent();
7060}
7061
7064 unsigned int TypeIdx, LLT MoreTy) {
7065 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
7066 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7067 unsigned NumElts = DstTy.getNumElements();
7068 unsigned WidenNumElts = MoreTy.getNumElements();
7069
7070 if (DstTy.isVector() && Src1Ty.isVector() &&
7071 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7073 }
7074
7075 if (TypeIdx != 0)
7076 return UnableToLegalize;
7077
7078 // Expect a canonicalized shuffle.
7079 if (DstTy != Src1Ty || DstTy != Src2Ty)
7080 return UnableToLegalize;
7081
7082 moreElementsVectorSrc(MI, MoreTy, 1);
7083 moreElementsVectorSrc(MI, MoreTy, 2);
7084
7085 // Adjust mask based on new input vector length.
7086 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7087 for (unsigned I = 0; I != NumElts; ++I) {
7088 int Idx = Mask[I];
7089 if (Idx < static_cast<int>(NumElts))
7090 NewMask[I] = Idx;
7091 else
7092 NewMask[I] = Idx - NumElts + WidenNumElts;
7093 }
7094 moreElementsVectorDst(MI, MoreTy, 0);
7095 MIRBuilder.setInstrAndDebugLoc(MI);
7096 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7097 MI.getOperand(1).getReg(),
7098 MI.getOperand(2).getReg(), NewMask);
7099 MI.eraseFromParent();
7100 return Legalized;
7101}
7102
7103void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7104 ArrayRef<Register> Src1Regs,
7105 ArrayRef<Register> Src2Regs,
7106 LLT NarrowTy) {
7108 unsigned SrcParts = Src1Regs.size();
7109 unsigned DstParts = DstRegs.size();
7110
7111 unsigned DstIdx = 0; // Low bits of the result.
7112 Register FactorSum =
7113 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7114 DstRegs[DstIdx] = FactorSum;
7115
7116 Register CarrySumPrevDstIdx;
7118
7119 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7120 // Collect high parts of muls from previous DstIdx.
7121 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7122 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7123 MachineInstrBuilder Umulh =
7124 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7125 Factors.push_back(Umulh.getReg(0));
7126 }
7127 // Collect low parts of muls for DstIdx. Visit the diagonal starting with
7128 // the low Src1 part, so multiply-add selectors can use it as the first
7129 // accumulated cross product.
7130 unsigned LowStart = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7131 unsigned LowEnd = std::min(DstIdx, SrcParts - 1);
7132 for (unsigned RevI = LowEnd + 1; RevI != LowStart; --RevI) {
7133 unsigned i = RevI - 1;
7135 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7136 Factors.push_back(Mul.getReg(0));
7137 }
7138 // Add CarrySum from additions calculated for previous DstIdx.
7139 if (DstIdx != 1) {
7140 Factors.push_back(CarrySumPrevDstIdx);
7141 }
7142
7143 Register CarrySum;
7144 // Add all factors and accumulate all carries into CarrySum.
7145 if (DstIdx != DstParts - 1) {
7146 MachineInstrBuilder Uaddo =
7147 B.buildUAddo(NarrowTy, LLT::integer(1), Factors[0], Factors[1]);
7148 FactorSum = Uaddo.getReg(0);
7149 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7150 for (unsigned i = 2; i < Factors.size(); ++i) {
7151 MachineInstrBuilder Uaddo =
7152 B.buildUAddo(NarrowTy, LLT::integer(1), FactorSum, Factors[i]);
7153 FactorSum = Uaddo.getReg(0);
7154 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7155 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7156 }
7157 } else {
7158 // Since value for the next index is not calculated, neither is CarrySum.
7159 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7160 for (unsigned i = 2; i < Factors.size(); ++i)
7161 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7162 }
7163
7164 CarrySumPrevDstIdx = CarrySum;
7165 DstRegs[DstIdx] = FactorSum;
7166 Factors.clear();
7167 }
7168}
7169
7172 LLT NarrowTy) {
7173 if (TypeIdx != 0)
7174 return UnableToLegalize;
7175
7176 Register DstReg = MI.getOperand(0).getReg();
7177 LLT DstType = MRI.getType(DstReg);
7178 // FIXME: add support for vector types
7179 if (DstType.isVector())
7180 return UnableToLegalize;
7181
7182 unsigned Opcode = MI.getOpcode();
7183 unsigned OpO, OpE, OpF;
7184 switch (Opcode) {
7185 case TargetOpcode::G_SADDO:
7186 case TargetOpcode::G_SADDE:
7187 case TargetOpcode::G_UADDO:
7188 case TargetOpcode::G_UADDE:
7189 case TargetOpcode::G_ADD:
7190 OpO = TargetOpcode::G_UADDO;
7191 OpE = TargetOpcode::G_UADDE;
7192 OpF = TargetOpcode::G_UADDE;
7193 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7194 OpF = TargetOpcode::G_SADDE;
7195 break;
7196 case TargetOpcode::G_SSUBO:
7197 case TargetOpcode::G_SSUBE:
7198 case TargetOpcode::G_USUBO:
7199 case TargetOpcode::G_USUBE:
7200 case TargetOpcode::G_SUB:
7201 OpO = TargetOpcode::G_USUBO;
7202 OpE = TargetOpcode::G_USUBE;
7203 OpF = TargetOpcode::G_USUBE;
7204 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7205 OpF = TargetOpcode::G_SSUBE;
7206 break;
7207 default:
7208 llvm_unreachable("Unexpected add/sub opcode!");
7209 }
7210
7211 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7212 unsigned NumDefs = MI.getNumExplicitDefs();
7213 Register Src1 = MI.getOperand(NumDefs).getReg();
7214 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7215 Register CarryDst, CarryIn;
7216 if (NumDefs == 2)
7217 CarryDst = MI.getOperand(1).getReg();
7218 if (MI.getNumOperands() == NumDefs + 3)
7219 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7220
7221 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7222 LLT LeftoverTy, DummyTy;
7223 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7224 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7225 MIRBuilder, MRI);
7226 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7227 MRI);
7228
7229 int NarrowParts = Src1Regs.size();
7230 Src1Regs.append(Src1Left);
7231 Src2Regs.append(Src2Left);
7232 DstRegs.reserve(Src1Regs.size());
7233
7234 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7235 Register DstReg =
7236 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7237 Register CarryOut;
7238 // Forward the final carry-out to the destination register
7239 if (i == e - 1 && CarryDst)
7240 CarryOut = CarryDst;
7241 else
7242 CarryOut = MRI.createGenericVirtualRegister(LLT::integer(1));
7243
7244 if (!CarryIn) {
7245 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7246 {Src1Regs[i], Src2Regs[i]});
7247 } else if (i == e - 1) {
7248 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7249 {Src1Regs[i], Src2Regs[i], CarryIn});
7250 } else {
7251 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7252 {Src1Regs[i], Src2Regs[i], CarryIn});
7253 }
7254
7255 DstRegs.push_back(DstReg);
7256 CarryIn = CarryOut;
7257 }
7258 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7259 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7260 ArrayRef(DstRegs).drop_front(NarrowParts));
7261
7262 MI.eraseFromParent();
7263 return Legalized;
7264}
7265
7268 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7269
7270 LLT Ty = MRI.getType(DstReg);
7271 if (Ty.isVector())
7272 return UnableToLegalize;
7273
7274 unsigned Size = Ty.getSizeInBits();
7275 unsigned NarrowSize = NarrowTy.getSizeInBits();
7276 if (Size % NarrowSize != 0)
7277 return UnableToLegalize;
7278
7279 unsigned NumParts = Size / NarrowSize;
7280 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7281 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7282
7283 SmallVector<Register, 2> Src1Parts, Src2Parts;
7284 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7285 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7286 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7287 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7288
7289 // Take only high half of registers if this is high mul.
7290 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7291 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7292 MI.eraseFromParent();
7293 return Legalized;
7294}
7295
7298 LLT NarrowTy) {
7299 if (TypeIdx != 0)
7300 return UnableToLegalize;
7301
7302 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7303
7304 Register Src = MI.getOperand(1).getReg();
7305 LLT SrcTy = MRI.getType(Src);
7306
7307 // If all finite floats fit into the narrowed integer type, we can just swap
7308 // out the result type. This is practically only useful for conversions from
7309 // half to at least 16-bits, so just handle the one case.
7310 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7311 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7312 return UnableToLegalize;
7313
7314 Observer.changingInstr(MI);
7315 narrowScalarDst(MI, NarrowTy, 0,
7316 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7317 Observer.changedInstr(MI);
7318 return Legalized;
7319}
7320
7323 LLT NarrowTy) {
7324 if (TypeIdx != 1)
7325 return UnableToLegalize;
7326
7327 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7328
7329 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7330 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7331 // NarrowSize.
7332 if (SizeOp1 % NarrowSize != 0)
7333 return UnableToLegalize;
7334 int NumParts = SizeOp1 / NarrowSize;
7335
7336 SmallVector<Register, 2> SrcRegs, DstRegs;
7337 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7338 MIRBuilder, MRI);
7339
7340 Register OpReg = MI.getOperand(0).getReg();
7341 uint64_t OpStart = MI.getOperand(2).getImm();
7342 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7343 for (int i = 0; i < NumParts; ++i) {
7344 unsigned SrcStart = i * NarrowSize;
7345
7346 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7347 // No part of the extract uses this subregister, ignore it.
7348 continue;
7349 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7350 // The entire subregister is extracted, forward the value.
7351 DstRegs.push_back(SrcRegs[i]);
7352 continue;
7353 }
7354
7355 // OpSegStart is where this destination segment would start in OpReg if it
7356 // extended infinitely in both directions.
7357 int64_t ExtractOffset;
7358 uint64_t SegSize;
7359 if (OpStart < SrcStart) {
7360 ExtractOffset = 0;
7361 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7362 } else {
7363 ExtractOffset = OpStart - SrcStart;
7364 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7365 }
7366
7367 Register SegReg = SrcRegs[i];
7368 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7369 // A genuine extract is needed.
7370 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7371 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7372 }
7373
7374 DstRegs.push_back(SegReg);
7375 }
7376
7377 Register DstReg = MI.getOperand(0).getReg();
7378 if (MRI.getType(DstReg).isVector())
7379 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7380 else if (DstRegs.size() > 1)
7381 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7382 else
7383 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7384 MI.eraseFromParent();
7385 return Legalized;
7386}
7387
7390 LLT NarrowTy) {
7391 // FIXME: Don't know how to handle secondary types yet.
7392 if (TypeIdx != 0)
7393 return UnableToLegalize;
7394
7395 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7396 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7397 LLT LeftoverTy;
7398 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7399 LeftoverRegs, MIRBuilder, MRI);
7400
7401 SrcRegs.append(LeftoverRegs);
7402
7403 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7404 Register OpReg = MI.getOperand(2).getReg();
7405 uint64_t OpStart = MI.getOperand(3).getImm();
7406 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7407 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7408 unsigned DstStart = I * NarrowSize;
7409
7410 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7411 // The entire subregister is defined by this insert, forward the new
7412 // value.
7413 DstRegs.push_back(OpReg);
7414 continue;
7415 }
7416
7417 Register SrcReg = SrcRegs[I];
7418 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7419 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7420 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7421 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7422 }
7423
7424 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7425 // No part of the insert affects this subregister, forward the original.
7426 DstRegs.push_back(SrcReg);
7427 continue;
7428 }
7429
7430 // OpSegStart is where this destination segment would start in OpReg if it
7431 // extended infinitely in both directions.
7432 int64_t ExtractOffset, InsertOffset;
7433 uint64_t SegSize;
7434 if (OpStart < DstStart) {
7435 InsertOffset = 0;
7436 ExtractOffset = DstStart - OpStart;
7437 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7438 } else {
7439 InsertOffset = OpStart - DstStart;
7440 ExtractOffset = 0;
7441 SegSize =
7442 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7443 }
7444
7445 Register SegReg = OpReg;
7446 if (ExtractOffset != 0 || SegSize != OpSize) {
7447 // A genuine extract is needed.
7448 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7449 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7450 }
7451
7452 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7453 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7454 DstRegs.push_back(DstReg);
7455 }
7456
7457 uint64_t WideSize = DstRegs.size() * NarrowSize;
7458 Register DstReg = MI.getOperand(0).getReg();
7459 if (WideSize > RegTy.getSizeInBits()) {
7460 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7461 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7462 MIRBuilder.buildTrunc(DstReg, MergeReg);
7463 } else
7464 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7465
7466 MI.eraseFromParent();
7467 return Legalized;
7468}
7469
7472 LLT NarrowTy) {
7473 Register DstReg = MI.getOperand(0).getReg();
7474 LLT DstTy = MRI.getType(DstReg);
7475
7476 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7477
7478 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7479 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7480 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7481 LLT LeftoverTy;
7482 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7483 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7484 return UnableToLegalize;
7485
7486 LLT Unused;
7487 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7488 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7489 llvm_unreachable("inconsistent extractParts result");
7490
7491 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7492 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7493 {Src0Regs[I], Src1Regs[I]});
7494 DstRegs.push_back(Inst.getReg(0));
7495 }
7496
7497 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7498 auto Inst = MIRBuilder.buildInstr(
7499 MI.getOpcode(),
7500 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7501 DstLeftoverRegs.push_back(Inst.getReg(0));
7502 }
7503
7504 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7505 LeftoverTy, DstLeftoverRegs);
7506
7507 MI.eraseFromParent();
7508 return Legalized;
7509}
7510
7513 LLT NarrowTy) {
7514 if (TypeIdx != 0)
7515 return UnableToLegalize;
7516
7517 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7518
7519 LLT DstTy = MRI.getType(DstReg);
7520 if (DstTy.isVector())
7521 return UnableToLegalize;
7522
7524 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7525 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7526 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7527
7528 MI.eraseFromParent();
7529 return Legalized;
7530}
7531
7534 LLT NarrowTy) {
7535 if (TypeIdx != 0)
7536 return UnableToLegalize;
7537
7538 Register CondReg = MI.getOperand(1).getReg();
7539 LLT CondTy = MRI.getType(CondReg);
7540 if (CondTy.isVector()) // TODO: Handle vselect
7541 return UnableToLegalize;
7542
7543 Register DstReg = MI.getOperand(0).getReg();
7544 LLT DstTy = MRI.getType(DstReg);
7545
7546 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7547 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7548 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7549 LLT LeftoverTy;
7550 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7551 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7552 return UnableToLegalize;
7553
7554 LLT Unused;
7555 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7556 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7557 llvm_unreachable("inconsistent extractParts result");
7558
7559 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7560 auto Select = MIRBuilder.buildSelect(NarrowTy,
7561 CondReg, Src1Regs[I], Src2Regs[I]);
7562 DstRegs.push_back(Select.getReg(0));
7563 }
7564
7565 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7566 auto Select = MIRBuilder.buildSelect(
7567 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7568 DstLeftoverRegs.push_back(Select.getReg(0));
7569 }
7570
7571 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7572 LeftoverTy, DstLeftoverRegs);
7573
7574 MI.eraseFromParent();
7575 return Legalized;
7576}
7577
7580 LLT NarrowTy) {
7581 if (TypeIdx != 1)
7582 return UnableToLegalize;
7583
7584 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7585 unsigned NarrowSize = NarrowTy.getSizeInBits();
7586
7587 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7588 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7589
7591 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7592 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7593 auto C_0 = B.buildConstant(NarrowTy, 0);
7594 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::integer(1),
7595 UnmergeSrc.getReg(1), C_0);
7596 auto LoCTLZ = IsUndef ? B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0))
7597 : B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7598 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7599 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7600 auto HiCTLZ = B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1));
7601 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7602
7603 MI.eraseFromParent();
7604 return Legalized;
7605 }
7606
7607 return UnableToLegalize;
7608}
7609
7612 LLT NarrowTy) {
7613 if (TypeIdx != 1)
7614 return UnableToLegalize;
7615
7616 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7617 unsigned NarrowSize = NarrowTy.getSizeInBits();
7618
7619 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7620 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7621
7623 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7624 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7625 auto C_0 = B.buildConstant(NarrowTy, 0);
7626 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7627 UnmergeSrc.getReg(0), C_0);
7628 auto HiCTTZ = IsUndef ? B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1))
7629 : B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7630 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7631 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7632 auto LoCTTZ = B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0));
7633 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7634
7635 MI.eraseFromParent();
7636 return Legalized;
7637 }
7638
7639 return UnableToLegalize;
7640}
7641
7644 LLT NarrowTy) {
7645 if (TypeIdx != 1)
7646 return UnableToLegalize;
7647
7648 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7649 unsigned NarrowSize = NarrowTy.getSizeInBits();
7650
7651 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7652 return UnableToLegalize;
7653
7655
7656 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7657 Register Lo = UnmergeSrc.getReg(0);
7658 Register Hi = UnmergeSrc.getReg(1);
7659
7660 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7661 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7662
7663 auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
7664
7665 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7666 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7667 // Otherwise the leading zeros represent additional sign bits of the original
7668 // value.
7669 auto LoInv = B.buildXor(DstTy, Lo, Sign);
7670 auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
7671
7672 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7673 auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
7674 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7675
7676 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7677
7678 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7679
7680 MI.eraseFromParent();
7681 return Legalized;
7682}
7683
7686 LLT NarrowTy) {
7687 if (TypeIdx != 1)
7688 return UnableToLegalize;
7689
7690 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7691 unsigned NarrowSize = NarrowTy.getSizeInBits();
7692
7693 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7694 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7695
7696 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7697 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7698 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7699
7700 MI.eraseFromParent();
7701 return Legalized;
7702 }
7703
7704 return UnableToLegalize;
7705}
7706
7709 LLT NarrowTy) {
7710 if (TypeIdx != 1)
7711 return UnableToLegalize;
7712
7714 Register ExpReg = MI.getOperand(2).getReg();
7715 LLT ExpTy = MRI.getType(ExpReg);
7716
7717 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7718
7719 // Clamp the exponent to the range of the target type.
7720 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7721 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7722 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7723 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7724
7725 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7726 Observer.changingInstr(MI);
7727 MI.getOperand(2).setReg(Trunc.getReg(0));
7728 Observer.changedInstr(MI);
7729 return Legalized;
7730}
7731
7734 unsigned Opc = MI.getOpcode();
7735 const auto &TII = MIRBuilder.getTII();
7736 auto isSupported = [this](const LegalityQuery &Q) {
7737 auto QAction = LI.getAction(Q).Action;
7738 return QAction == Legal || QAction == Libcall || QAction == Custom;
7739 };
7740 switch (Opc) {
7741 default:
7742 return UnableToLegalize;
7743 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7744 // This trivially expands to CTLZ.
7745 Observer.changingInstr(MI);
7746 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7747 Observer.changedInstr(MI);
7748 return Legalized;
7749 }
7750 case TargetOpcode::G_CTLZ: {
7751 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7752 unsigned Len = SrcTy.getScalarSizeInBits();
7753
7754 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7755 // If CTLZ_ZERO_POISON is supported, emit that and a select for zero.
7756 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_POISON(DstTy, SrcReg);
7757 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7758 auto ICmp = MIRBuilder.buildICmp(
7759 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7760 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7761 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7762 MI.eraseFromParent();
7763 return Legalized;
7764 }
7765 // for now, we do this:
7766 // NewLen = NextPowerOf2(Len);
7767 // x = x | (x >> 1);
7768 // x = x | (x >> 2);
7769 // ...
7770 // x = x | (x >>16);
7771 // x = x | (x >>32); // for 64-bit input
7772 // Upto NewLen/2
7773 // return Len - popcount(x);
7774 //
7775 // Ref: "Hacker's Delight" by Henry Warren
7776 Register Op = SrcReg;
7777 unsigned NewLen = PowerOf2Ceil(Len);
7778 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7779 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7780 auto MIBOp = MIRBuilder.buildOr(
7781 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7782 Op = MIBOp.getReg(0);
7783 }
7784 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7785 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7786 MIBPop);
7787 MI.eraseFromParent();
7788 return Legalized;
7789 }
7790 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7791 // This trivially expands to CTTZ.
7792 Observer.changingInstr(MI);
7793 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7794 Observer.changedInstr(MI);
7795 return Legalized;
7796 }
7797 case TargetOpcode::G_CTTZ: {
7798 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7799
7800 unsigned Len = SrcTy.getScalarSizeInBits();
7801 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7802 // If CTTZ_ZERO_POISON is legal or custom, emit that and a select with
7803 // zero.
7804 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_POISON(DstTy, SrcReg);
7805 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7806 auto ICmp = MIRBuilder.buildICmp(
7807 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7808 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7809 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7810 MI.eraseFromParent();
7811 return Legalized;
7812 }
7813 // for now, we use: { return popcount(~x & (x - 1)); }
7814 // unless the target has ctlz but not ctpop, in which case we use:
7815 // { return 32 - nlz(~x & (x-1)); }
7816 // Ref: "Hacker's Delight" by Henry Warren
7817 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7818 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7819 auto MIBTmp = MIRBuilder.buildAnd(
7820 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7821 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7822 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7823 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7824 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7825 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7826 MI.eraseFromParent();
7827 return Legalized;
7828 }
7829 Observer.changingInstr(MI);
7830 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7831 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7832 Observer.changedInstr(MI);
7833 return Legalized;
7834 }
7835 case TargetOpcode::G_CTPOP: {
7836 Register SrcReg = MI.getOperand(1).getReg();
7837 LLT Ty = MRI.getType(SrcReg);
7838 unsigned Size = Ty.getScalarSizeInBits();
7840
7841 // Bail out on irregular type lengths.
7842 if (Size > 128 || Size % 8 != 0)
7843 return UnableToLegalize;
7844
7845 // Count set bits in blocks of 2 bits. Default approach would be
7846 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7847 // We use following formula instead:
7848 // B2Count = val - { (val >> 1) & 0x55555555 }
7849 // since it gives same result in blocks of 2 with one instruction less.
7850 auto C_1 = B.buildConstant(Ty, 1);
7851 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7852 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7853 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7854 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7855 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7856
7857 // In order to get count in blocks of 4 add values from adjacent block of 2.
7858 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7859 auto C_2 = B.buildConstant(Ty, 2);
7860 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7861 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7862 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7863 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7864 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7865 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7866
7867 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7868 // addition since count value sits in range {0,...,8} and 4 bits are enough
7869 // to hold such binary values. After addition high 4 bits still hold count
7870 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7871 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7872 auto C_4 = B.buildConstant(Ty, 4);
7873 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7874 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7875 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7876 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7877 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7878
7879 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7880
7881 // Avoid the multiply when shift-add is cheaper.
7882 if (Size == 16 && !Ty.isVector()) {
7883 // v = (v + (v >> 8)) & 0xFF;
7884 auto C_8 = B.buildConstant(Ty, 8);
7885 auto HighSum = B.buildLShr(Ty, B8Count, C_8);
7886 auto Res = B.buildAdd(Ty, B8Count, HighSum);
7887 B.buildAnd(MI.getOperand(0).getReg(), Res, B.buildConstant(Ty, 0xFF));
7888 MI.eraseFromParent();
7889 return Legalized;
7890 }
7891
7892 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7893 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7894 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7895
7896 // Shift count result from 8 high bits to low bits.
7897 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7898
7899 auto IsMulSupported = [this](const LLT Ty) {
7900 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7901 return Action == Legal || Action == WidenScalar || Action == Custom;
7902 };
7903 if (IsMulSupported(Ty)) {
7904 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7905 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7906 } else {
7907 auto ResTmp = B8Count;
7908 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7909 auto ShiftC = B.buildConstant(Ty, Shift);
7910 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7911 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7912 }
7913 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7914 }
7915 MI.eraseFromParent();
7916 return Legalized;
7917 }
7918 case TargetOpcode::G_CTLS: {
7919 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7920
7921 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7922 auto SignIdxC =
7923 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7924 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7925
7926 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7927
7928 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7929 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7930
7931 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7932 MI.eraseFromParent();
7933 return Legalized;
7934 }
7935 }
7936}
7937
7938// Check that (every element of) Reg is undef or not an exact multiple of BW.
7940 Register Reg, unsigned BW) {
7941 return matchUnaryPredicate(
7942 MRI, Reg,
7943 [=](const Constant *C) {
7944 // Null constant here means an undef.
7946 return !CI || CI->getValue().urem(BW) != 0;
7947 },
7948 /*AllowUndefs*/ true);
7949}
7950
7953 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7954 LLT Ty = MRI.getType(Dst);
7955 LLT ShTy = MRI.getType(Z);
7956
7957 unsigned BW = Ty.getScalarSizeInBits();
7958
7959 if (!isPowerOf2_32(BW))
7960 return UnableToLegalize;
7961
7962 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7963 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7964
7965 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7966 // fshl X, Y, Z -> fshr X, Y, -Z
7967 // fshr X, Y, Z -> fshl X, Y, -Z
7968 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7969 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7970 } else {
7971 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7972 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7973 auto One = MIRBuilder.buildConstant(ShTy, 1);
7974 if (IsFSHL) {
7975 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7976 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7977 } else {
7978 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7979 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7980 }
7981
7982 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7983 }
7984
7985 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7986 MI.eraseFromParent();
7987 return Legalized;
7988}
7989
7992 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7993 LLT Ty = MRI.getType(Dst);
7994 LLT ShTy = MRI.getType(Z);
7995
7996 const unsigned BW = Ty.getScalarSizeInBits();
7997 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7998
7999 Register ShX, ShY;
8000 Register ShAmt, InvShAmt;
8001
8002 // FIXME: Emit optimized urem by constant instead of letting it expand later.
8003 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
8004 // fshl: X << C | Y >> (BW - C)
8005 // fshr: X << (BW - C) | Y >> C
8006 // where C = Z % BW is not zero
8007 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
8008 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8009 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8010 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8011 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8012 } else {
8013 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8014 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8015 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
8016 if (isPowerOf2_32(BW)) {
8017 // Z % BW -> Z & (BW - 1)
8018 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8019 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8020 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
8021 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8022 } else {
8023 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
8024 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8025 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8026 }
8027
8028 auto One = MIRBuilder.buildConstant(ShTy, 1);
8029 if (IsFSHL) {
8030 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
8031 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
8032 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8033 } else {
8034 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
8035 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8036 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
8037 }
8038 }
8039
8040 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
8041 MI.eraseFromParent();
8042 return Legalized;
8043}
8044
8047 // These operations approximately do the following (while avoiding undefined
8048 // shifts by BW):
8049 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8050 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8051 Register Dst = MI.getOperand(0).getReg();
8052 LLT Ty = MRI.getType(Dst);
8053 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
8054
8055 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
8056 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8057
8058 // TODO: Use smarter heuristic that accounts for vector legalization.
8059 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
8060 return lowerFunnelShiftAsShifts(MI);
8061
8062 // This only works for powers of 2, fallback to shifts if it fails.
8063 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
8064 if (Result == UnableToLegalize)
8065 return lowerFunnelShiftAsShifts(MI);
8066 return Result;
8067}
8068
8070 auto [Dst, Src] = MI.getFirst2Regs();
8071 LLT DstTy = MRI.getType(Dst);
8072 LLT SrcTy = MRI.getType(Src);
8073
8074 uint32_t DstTySize = DstTy.getSizeInBits();
8075 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8076 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8077
8078 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
8079 !isPowerOf2_32(SrcTyScalarSize))
8080 return UnableToLegalize;
8081
8082 // The step between extend is too large, split it by creating an intermediate
8083 // extend instruction
8084 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8085 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
8086 // If the destination type is illegal, split it into multiple statements
8087 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8088 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8089 // Unmerge the vector
8090 LLT EltTy = MidTy.changeElementCount(
8092 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8093
8094 // ZExt the vectors
8095 LLT ZExtResTy = DstTy.changeElementCount(
8097 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8098 {UnmergeSrc.getReg(0)});
8099 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8100 {UnmergeSrc.getReg(1)});
8101
8102 // Merge the ending vectors
8103 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8104
8105 MI.eraseFromParent();
8106 return Legalized;
8107 }
8108 return UnableToLegalize;
8109}
8110
8112 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8113 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8114 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8115 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8116 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8117 // %lo16(<4 x s16>) = G_TRUNC %inlo
8118 // %hi16(<4 x s16>) = G_TRUNC %inhi
8119 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8120 // %res(<8 x s8>) = G_TRUNC %in16
8121
8122 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8123
8124 Register DstReg = MI.getOperand(0).getReg();
8125 Register SrcReg = MI.getOperand(1).getReg();
8126 LLT DstTy = MRI.getType(DstReg);
8127 LLT SrcTy = MRI.getType(SrcReg);
8128
8129 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8131 isPowerOf2_32(SrcTy.getNumElements()) &&
8132 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8133 // Split input type.
8134 LLT SplitSrcTy = SrcTy.changeElementCount(
8135 SrcTy.getElementCount().divideCoefficientBy(2));
8136
8137 // First, split the source into two smaller vectors.
8138 SmallVector<Register, 2> SplitSrcs;
8139 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8140
8141 // Truncate the splits into intermediate narrower elements.
8142 LLT InterTy;
8143 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8144 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8145 else
8146 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8147 for (Register &Src : SplitSrcs)
8148 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8149
8150 // Combine the new truncates into one vector
8151 auto Merge = MIRBuilder.buildMergeLikeInstr(
8152 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8153
8154 // Truncate the new vector to the final result type
8155 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8156 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8157 else
8158 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8159
8160 MI.eraseFromParent();
8161
8162 return Legalized;
8163 }
8164 return UnableToLegalize;
8165}
8166
8169 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8170 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8171 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8172 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8173 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8174 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8175 MI.eraseFromParent();
8176 return Legalized;
8177}
8178
8180 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8181
8182 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8183 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8184
8185 MIRBuilder.setInstrAndDebugLoc(MI);
8186
8187 // If a rotate in the other direction is supported, use it.
8188 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8189 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8190 isPowerOf2_32(EltSizeInBits))
8191 return lowerRotateWithReverseRotate(MI);
8192
8193 // If a funnel shift is supported, use it.
8194 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8195 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8196 bool IsFShLegal = false;
8197 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8198 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8199 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8200 Register R3) {
8201 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8202 MI.eraseFromParent();
8203 return Legalized;
8204 };
8205 // If a funnel shift in the other direction is supported, use it.
8206 if (IsFShLegal) {
8207 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8208 } else if (isPowerOf2_32(EltSizeInBits)) {
8209 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8210 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8211 }
8212 }
8213
8214 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8215 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8216 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8217 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8218 Register ShVal;
8219 Register RevShiftVal;
8220 if (isPowerOf2_32(EltSizeInBits)) {
8221 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8222 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8223 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8224 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8225 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8226 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8227 RevShiftVal =
8228 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8229 } else {
8230 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8231 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8232 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8233 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8234 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8235 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8236 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8237 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8238 RevShiftVal =
8239 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8240 }
8241 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8242 MI.eraseFromParent();
8243 return Legalized;
8244}
8245
8246// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8247// representation.
8250 auto [Dst, Src] = MI.getFirst2Regs();
8251 const LLT S64 = LLT::scalar(64);
8252 const LLT S32 = LLT::scalar(32);
8253 const LLT S1 = LLT::scalar(1);
8254
8255 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8256
8257 // unsigned cul2f(ulong u) {
8258 // uint lz = clz(u);
8259 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8260 // u = (u << lz) & 0x7fffffffffffffffUL;
8261 // ulong t = u & 0xffffffffffUL;
8262 // uint v = (e << 23) | (uint)(u >> 40);
8263 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8264 // return as_float(v + r);
8265 // }
8266
8267 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8268 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8269
8270 auto LZ = MIRBuilder.buildCTLZ_ZERO_POISON(S32, Src);
8271
8272 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8273 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8274
8275 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8276 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8277
8278 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8279 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8280
8281 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8282
8283 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8284 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8285
8286 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8287 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8288 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8289
8290 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8291 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8292 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8293 auto One = MIRBuilder.buildConstant(S32, 1);
8294
8295 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8296 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8297 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8298 MIRBuilder.buildAdd(Dst, V, R);
8299
8300 MI.eraseFromParent();
8301 return Legalized;
8302}
8303
8304// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8305// operations and G_SITOFP
8308 auto [Dst, Src] = MI.getFirst2Regs();
8309 const LLT S64 = LLT::scalar(64);
8310 const LLT S32 = LLT::scalar(32);
8311 const LLT S1 = LLT::scalar(1);
8312
8313 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8314
8315 // For i64 < INT_MAX we simply reuse SITOFP.
8316 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8317 // saved before division, convert to float by SITOFP, multiply the result
8318 // by 2.
8319 auto One = MIRBuilder.buildConstant(S64, 1);
8320 auto Zero = MIRBuilder.buildConstant(S64, 0);
8321 // Result if Src < INT_MAX
8322 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8323 // Result if Src >= INT_MAX
8324 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8325 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8326 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8327 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8328 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8329 // Check if the original value is larger than INT_MAX by comparing with
8330 // zero to pick one of the two conversions.
8331 auto IsLarge =
8332 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8333 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8334
8335 MI.eraseFromParent();
8336 return Legalized;
8337}
8338
8339// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8340// IEEE double representation.
8343 auto [Dst, Src] = MI.getFirst2Regs();
8344 const LLT S64 = LLT::scalar(64);
8345 const LLT S32 = LLT::scalar(32);
8346
8347 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8348
8349 // We create double value from 32 bit parts with 32 exponent difference.
8350 // Note that + and - are float operations that adjust the implicit leading
8351 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8352 //
8353 // X = 2^52 * 1.0...LowBits
8354 // Y = 2^84 * 1.0...HighBits
8355 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8356 // = - 2^52 * 1.0...HighBits
8357 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8358 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8359 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8360 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8361 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8362 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8363
8364 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8365 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8366 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8367 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8368 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8369 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8370 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8371
8372 MI.eraseFromParent();
8373 return Legalized;
8374}
8375
8376/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8377/// convert fpround f64->f16 without double-rounding, so we manually perform the
8378/// lowering here where we know it is valid.
8381 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8382 auto DstFpTy =
8383 SrcTy.changeElementType(LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8384 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8385 ? MIRBuilder.buildUITOFP(DstFpTy, Src)
8386 : MIRBuilder.buildSITOFP(DstFpTy, Src);
8387 LLT F32Ty = DstFpTy.changeElementSize(32);
8388 auto M2 = MIRBuilder.buildFPTrunc(F32Ty, M1);
8389 MIRBuilder.buildFPTrunc(Dst, M2);
8390 MI.eraseFromParent();
8392}
8393
8395 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8396
8397 if (SrcTy == LLT::scalar(1)) {
8398 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8399 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8400 MIRBuilder.buildSelect(Dst, Src, True, False);
8401 MI.eraseFromParent();
8402 return Legalized;
8403 }
8404
8405 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8406 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8407
8408 if (SrcTy != LLT::scalar(64))
8409 return UnableToLegalize;
8410
8411 if (DstTy == LLT::scalar(32))
8412 // TODO: SelectionDAG has several alternative expansions to port which may
8413 // be more reasonable depending on the available instructions. We also need
8414 // a more advanced mechanism to choose an optimal version depending on
8415 // target features such as sitofp or CTLZ availability.
8417
8418 if (DstTy == LLT::scalar(64))
8420
8421 return UnableToLegalize;
8422}
8423
8425 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8426
8427 const LLT I64 = LLT::integer(64);
8428 const LLT I32 = LLT::integer(32);
8429 const LLT I1 = LLT::integer(1);
8430
8431 if (SrcTy == I1) {
8432 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8433 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8434 MIRBuilder.buildSelect(Dst, Src, True, False);
8435 MI.eraseFromParent();
8436 return Legalized;
8437 }
8438
8439 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8440 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8441
8442 if (SrcTy != I64)
8443 return UnableToLegalize;
8444
8445 if (DstTy.getScalarSizeInBits() == 32) {
8446 // signed cl2f(long l) {
8447 // long s = l >> 63;
8448 // float r = cul2f((l + s) ^ s);
8449 // return s ? -r : r;
8450 // }
8451 Register L = Src;
8452 auto SignBit = MIRBuilder.buildConstant(I64, 63);
8453 auto S = MIRBuilder.buildAShr(I64, L, SignBit);
8454
8455 auto LPlusS = MIRBuilder.buildAdd(I64, L, S);
8456 auto Xor = MIRBuilder.buildXor(I64, LPlusS, S);
8457 auto R = MIRBuilder.buildUITOFP(I32, Xor);
8458
8459 auto RNeg = MIRBuilder.buildFNeg(I32, R);
8460 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, S,
8461 MIRBuilder.buildConstant(I64, 0));
8462 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8463 MI.eraseFromParent();
8464 return Legalized;
8465 }
8466
8467 return UnableToLegalize;
8468}
8469
8471 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8472 const LLT S64 = LLT::scalar(64);
8473 const LLT S32 = LLT::scalar(32);
8474
8475 if (SrcTy != S64 && SrcTy != S32)
8476 return UnableToLegalize;
8477 if (DstTy != S32 && DstTy != S64)
8478 return UnableToLegalize;
8479
8480 // FPTOSI gives same result as FPTOUI for positive signed integers.
8481 // FPTOUI needs to deal with fp values that convert to unsigned integers
8482 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8483
8484 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8485 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8487 APInt::getZero(SrcTy.getSizeInBits()));
8488 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8489
8490 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8491
8492 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8493 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8494 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8495 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8496 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8497 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8498 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8499
8500 const LLT S1 = LLT::scalar(1);
8501
8502 MachineInstrBuilder FCMP =
8503 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8504 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8505
8506 MI.eraseFromParent();
8507 return Legalized;
8508}
8509
8511 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8512 const LLT S64 = LLT::scalar(64);
8513 const LLT S32 = LLT::scalar(32);
8514
8515 // FIXME: Only f32 to i64 conversions are supported.
8516 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8517 return UnableToLegalize;
8518
8519 // Expand f32 -> i64 conversion
8520 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8521 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8522
8523 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8524
8525 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8526 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8527
8528 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8529 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8530
8531 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8532 APInt::getSignMask(SrcEltBits));
8533 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8534 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8535 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8536 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8537
8538 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8539 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8540 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8541
8542 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8543 R = MIRBuilder.buildZExt(DstTy, R);
8544
8545 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8546 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8547 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8548 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8549
8550 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8551 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8552
8553 const LLT S1 = LLT::scalar(1);
8554 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8555 S1, Exponent, ExponentLoBit);
8556
8557 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8558
8559 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8560 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8561
8562 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8563
8564 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8565 S1, Exponent, ZeroSrcTy);
8566
8567 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8568 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8569
8570 MI.eraseFromParent();
8571 return Legalized;
8572}
8573
8576 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8577
8578 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8579 unsigned SatWidth = DstTy.getScalarSizeInBits();
8580
8581 // Determine minimum and maximum integer values and their corresponding
8582 // floating-point values.
8583 APInt MinInt, MaxInt;
8584 if (IsSigned) {
8585 MinInt = APInt::getSignedMinValue(SatWidth);
8586 MaxInt = APInt::getSignedMaxValue(SatWidth);
8587 } else {
8588 MinInt = APInt::getMinValue(SatWidth);
8589 MaxInt = APInt::getMaxValue(SatWidth);
8590 }
8591
8592 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8593 APFloat MinFloat(Semantics);
8594 APFloat MaxFloat(Semantics);
8595
8596 APFloat::opStatus MinStatus =
8597 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8598 APFloat::opStatus MaxStatus =
8599 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8600 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8601 !(MaxStatus & APFloat::opStatus::opInexact);
8602
8603 // If the integer bounds are exactly representable as floats, emit a
8604 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8605 // and selects.
8606 if (AreExactFloatBounds) {
8607 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8608 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8609 auto MaxP =
8610 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src, MaxC);
8611 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8612 // Clamp by MaxFloat from above. NaN cannot occur.
8613 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8614 auto MinP = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, LLT::integer(1), Max,
8616 auto Min =
8617 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8618 // Convert clamped value to integer. In the unsigned case we're done,
8619 // because we mapped NaN to MinFloat, which will cast to zero.
8620 if (!IsSigned) {
8621 MIRBuilder.buildFPTOUI(Dst, Min);
8622 MI.eraseFromParent();
8623 return Legalized;
8624 }
8625
8626 // Otherwise, select 0 if Src is NaN.
8627 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8628 auto IsZero =
8629 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8630 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8631 FpToInt);
8632 MI.eraseFromParent();
8633 return Legalized;
8634 }
8635
8636 // Result of direct conversion. The assumption here is that the operation is
8637 // non-trapping and it's fine to apply it to an out-of-range value if we
8638 // select it away later.
8639 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8640 : MIRBuilder.buildFPTOUI(DstTy, Src);
8641
8642 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8643 // MinInt if Src is NaN.
8644 auto ULT = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, LLT::integer(1), Src,
8645 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8646 auto Max = MIRBuilder.buildSelect(
8647 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8648 // If Src OGT MaxFloat, select MaxInt.
8649 auto OGT = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src,
8650 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8651
8652 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8653 // is already zero.
8654 if (!IsSigned) {
8655 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8656 Max);
8657 MI.eraseFromParent();
8658 return Legalized;
8659 }
8660
8661 // Otherwise, select 0 if Src is NaN.
8662 auto Min = MIRBuilder.buildSelect(
8663 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8664 auto IsZero =
8665 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8666 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8667 MI.eraseFromParent();
8668 return Legalized;
8669}
8670
8671// Floating-point conversions using truncating and extending loads and stores.
8674 assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
8675 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8676 "Only G_FPEXT and G_FPTRUNC are expected");
8677
8678 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8679 MachinePointerInfo PtrInfo;
8680 unsigned StoreOpc;
8681 unsigned LoadOpc;
8682 LLT StackTy;
8683 if (MI.getOpcode() == TargetOpcode::G_FPEXT) {
8684 StackTy = SrcTy;
8685 StoreOpc = TargetOpcode::G_STORE;
8686 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8687 } else {
8688 StackTy = DstTy;
8689 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8690 LoadOpc = TargetOpcode::G_LOAD;
8691 }
8692
8693 Align StackTyAlign = getStackTemporaryAlignment(StackTy);
8694 auto StackTemp =
8695 createStackTemporary(StackTy.getSizeInBytes(), StackTyAlign, PtrInfo);
8696
8697 MachineFunction &MF = MIRBuilder.getMF();
8698 auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
8699 StackTy, StackTyAlign);
8700 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8701
8702 auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
8703 StackTy, StackTyAlign);
8704 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8705
8706 MI.eraseFromParent();
8707 return Legalized;
8708}
8709
8710// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8713 const LLT S1 = LLT::scalar(1);
8714 const LLT S32 = LLT::scalar(32);
8715
8716 auto [Dst, Src] = MI.getFirst2Regs();
8717 assert(MRI.getType(Dst).getScalarType() == LLT::float16() &&
8718 MRI.getType(Src).getScalarType() == LLT::float64());
8719
8720 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8721 return UnableToLegalize;
8722
8723 if (MI.getFlag(MachineInstr::FmAfn)) {
8724 unsigned Flags = MI.getFlags();
8725 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8726 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8727 MI.eraseFromParent();
8728 return Legalized;
8729 }
8730
8731 const unsigned ExpMask = 0x7ff;
8732 const unsigned ExpBiasf64 = 1023;
8733 const unsigned ExpBiasf16 = 15;
8734
8735 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8736 Register U = Unmerge.getReg(0);
8737 Register UH = Unmerge.getReg(1);
8738
8739 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8740 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8741
8742 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8743 // add the f16 bias (15) to get the biased exponent for the f16 format.
8744 E = MIRBuilder.buildAdd(
8745 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8746
8747 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8748 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8749
8750 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8751 MIRBuilder.buildConstant(S32, 0x1ff));
8752 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8753
8754 auto Zero = MIRBuilder.buildConstant(S32, 0);
8755 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8756 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8757 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8758
8759 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8760 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8761 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8762 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8763
8764 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8765 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8766
8767 // N = M | (E << 12);
8768 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8769 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8770
8771 // B = clamp(1-E, 0, 13);
8772 auto One = MIRBuilder.buildConstant(S32, 1);
8773 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8774 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8775 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8776
8777 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8778 MIRBuilder.buildConstant(S32, 0x1000));
8779
8780 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8781 auto D0 = MIRBuilder.buildShl(S32, D, B);
8782
8783 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8784 D0, SigSetHigh);
8785 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8786 D = MIRBuilder.buildOr(S32, D, D1);
8787
8788 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8789 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8790
8791 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8792 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8793
8794 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8795 MIRBuilder.buildConstant(S32, 3));
8796 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8797
8798 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8799 MIRBuilder.buildConstant(S32, 5));
8800 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8801
8802 V1 = MIRBuilder.buildOr(S32, V0, V1);
8803 V = MIRBuilder.buildAdd(S32, V, V1);
8804
8805 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8806 E, MIRBuilder.buildConstant(S32, 30));
8807 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8808 MIRBuilder.buildConstant(S32, 0x7c00), V);
8809
8810 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8811 E, MIRBuilder.buildConstant(S32, 1039));
8812 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8813
8814 // Extract the sign bit.
8815 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8816 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8817
8818 // Insert the sign bit
8819 V = MIRBuilder.buildOr(S32, Sign, V);
8820
8821 MIRBuilder.buildTrunc(Dst, V);
8822 MI.eraseFromParent();
8823 return Legalized;
8824}
8825
8826// f32 -> bf16 conversion using round-to-nearest-even rounding mode.
8829 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8830 assert(DstTy.getScalarType() == LLT::bfloat16() &&
8831 SrcTy.getScalarType() == LLT::float32());
8832
8833 LLT I1Ty = SrcTy.changeElementType(LLT::integer(1));
8834 LLT I16Ty = SrcTy.changeElementType(LLT::integer(16));
8835 LLT I32Ty = SrcTy.changeElementType(LLT::integer(32));
8836
8837 auto IsNaN = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, I1Ty, SrcReg,
8838 MIRBuilder.buildFConstant(SrcTy, 0));
8839 auto SrcI = MIRBuilder.buildBitcast(I32Ty, SrcReg);
8840
8841 // Conversions should set NaN's quiet bit. This also prevents NaNs from
8842 // turning into infinities.
8843 auto NaN = MIRBuilder.buildOr(I32Ty, SrcI,
8844 MIRBuilder.buildConstant(I32Ty, 0x400000));
8845
8846 // Factor in the contribution of the low 16 bits.
8847 auto Lsb =
8848 MIRBuilder.buildLShr(I32Ty, SrcI, MIRBuilder.buildConstant(I32Ty, 16));
8849 Lsb = MIRBuilder.buildAnd(I32Ty, Lsb, MIRBuilder.buildConstant(I32Ty, 1));
8850 auto RoundingBias =
8851 MIRBuilder.buildAdd(I32Ty, Lsb, MIRBuilder.buildConstant(I32Ty, 0x7fff));
8852 auto Add = MIRBuilder.buildAdd(I32Ty, SrcI, RoundingBias);
8853
8854 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
8855 // 0x80000000.
8856 auto Sel = MIRBuilder.buildSelect(I32Ty, IsNaN, NaN, Add);
8857
8858 // Now that we have rounded, shift the bits into position.
8859 auto Srl =
8860 MIRBuilder.buildLShr(I32Ty, Sel, MIRBuilder.buildConstant(I32Ty, 16));
8861 auto Trunc = MIRBuilder.buildTrunc(I16Ty, Srl);
8862 MIRBuilder.buildBitcast(DstReg, Trunc);
8863 MI.eraseFromParent();
8864 return Legalized;
8865}
8866
8869 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8870 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8872
8873 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8875
8876 return lowerFPExtAndTruncMem(MI);
8877}
8878
8880 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8881 LLT Ty = MRI.getType(Dst);
8882
8883 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8884 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8885 MI.eraseFromParent();
8886 return Legalized;
8887}
8888
8890 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8891 LLT Ty = MRI.getType(Src);
8892 auto Flags = MI.getFlags();
8893 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
8894
8895 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Ty, Src, Flags);
8896 auto FracPart = MIRBuilder.buildFSub(Ty, Src, IntPart, Flags);
8897
8898 Register FracToUse;
8899 if (MI.getFlag(MachineInstr::FmNoInfs)) {
8900 FracToUse = FracPart.getReg(0);
8901 } else {
8902 auto Abs = MIRBuilder.buildFAbs(Ty, Src, Flags);
8903 const fltSemantics &Semantics = getFltSemanticForLLT(Ty.getScalarType());
8904 auto Inf = MIRBuilder.buildFConstant(Ty, APFloat::getInf(Semantics));
8905 auto IsInf = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CondTy, Abs, Inf);
8906 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8907 auto Select = MIRBuilder.buildSelect(Ty, IsInf, Zero, FracPart);
8908 FracToUse = Select.getReg(0);
8909 }
8910
8911 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8912 MIRBuilder.buildCopy(DstInt, IntPart.getReg(0));
8913
8914 MI.eraseFromParent();
8915 return Legalized;
8916}
8917
8919 switch (Opc) {
8920 case TargetOpcode::G_SMIN:
8921 return CmpInst::ICMP_SLT;
8922 case TargetOpcode::G_SMAX:
8923 return CmpInst::ICMP_SGT;
8924 case TargetOpcode::G_UMIN:
8925 return CmpInst::ICMP_ULT;
8926 case TargetOpcode::G_UMAX:
8927 return CmpInst::ICMP_UGT;
8928 default:
8929 llvm_unreachable("not in integer min/max");
8930 }
8931}
8932
8934 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8935
8936 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8937 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::integer(1));
8938
8939 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8940 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8941
8942 MI.eraseFromParent();
8943 return Legalized;
8944}
8945
8948 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8949
8950 Register Dst = Cmp->getReg(0);
8951 LLT DstTy = MRI.getType(Dst);
8952 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8953 LLT CmpTy = DstTy.changeElementSize(1);
8954
8955 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8958 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8961
8962 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8963 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8964 Cmp->getRHSReg());
8965 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8966 Cmp->getRHSReg());
8967
8968 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8969 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8970 if (TLI.preferSelectsOverBooleanArithmetic(
8971 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8973 auto One = MIRBuilder.buildConstant(DstTy, 1);
8974 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8975
8976 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8977 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8978 } else {
8980 std::swap(IsGT, IsLT);
8981 // Extend boolean results to DstTy, which is at least i2, before subtracting
8982 // them.
8983 unsigned BoolExtOp =
8984 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8985 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8986 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8987 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8988 }
8989
8990 MI.eraseFromParent();
8991 return Legalized;
8992}
8993
8996 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8997 const int Src0Size = Src0Ty.getScalarSizeInBits();
8998 const int Src1Size = Src1Ty.getScalarSizeInBits();
8999
9000 LLT DstIntTy =
9001 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
9002 LLT Src0IntTy = Src0Ty.changeElementType(LLT::integer(Src0Size));
9003 LLT Src1IntTy = Src1Ty.changeElementType(LLT::integer(Src1Size));
9004
9005 Register Src0Int = Src0;
9006 Register Src1Int = Src1;
9007
9008 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9009 Src0Ty.getScalarType().isInteger()))
9010 Src0Int = MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9011
9012 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9013 Src1Ty.getScalarType().isInteger()))
9014 Src1Int = MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9015
9016 auto SignBitMask =
9017 MIRBuilder.buildConstant(Src0IntTy, APInt::getSignMask(Src0Size));
9018
9019 auto NotSignBitMask = MIRBuilder.buildConstant(
9020 Src0IntTy, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
9021
9022 Register And0 =
9023 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9024 Register And1;
9025 if (Src0Ty == Src1Ty) {
9026 And1 = MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9027 } else if (Src0Size > Src1Size) {
9028 auto ShiftAmt = MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9029 auto Zext = MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9030 auto Shift = MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9031 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9032 } else {
9033 auto ShiftAmt = MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9034 auto Shift = MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9035 auto Trunc = MIRBuilder.buildTrunc(Src0IntTy, Shift);
9036 And1 = MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9037 }
9038
9039 // Be careful about setting nsz/nnan/ninf on every instruction, since the
9040 // constants are a nan and -0.0, but the final result should preserve
9041 // everything.
9042 unsigned Flags = MI.getFlags();
9043
9044 // We masked the sign bit and the not-sign bit, so these are disjoint.
9045 Flags |= MachineInstr::Disjoint;
9046
9047 if (DstTy == DstIntTy)
9048 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9049 else {
9050 Register NewDst = MIRBuilder.buildOr(DstIntTy, And0, And1, Flags).getReg(0);
9051 MIRBuilder.buildBitcast(Dst, NewDst);
9052 }
9053
9054 MI.eraseFromParent();
9055 return Legalized;
9056}
9057
9060 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
9061 // identical handling. fminimumnum/fmaximumnum also need a path that do not
9062 // depend on fminnum/fmaxnum.
9063
9064 unsigned NewOp;
9065 switch (MI.getOpcode()) {
9066 case TargetOpcode::G_FMINNUM:
9067 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9068 break;
9069 case TargetOpcode::G_FMINIMUMNUM:
9070 NewOp = TargetOpcode::G_FMINNUM;
9071 break;
9072 case TargetOpcode::G_FMAXNUM:
9073 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9074 break;
9075 case TargetOpcode::G_FMAXIMUMNUM:
9076 NewOp = TargetOpcode::G_FMAXNUM;
9077 break;
9078 default:
9079 llvm_unreachable("unexpected min/max opcode");
9080 }
9081
9082 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9083 LLT Ty = MRI.getType(Dst);
9084
9085 if (!MI.getFlag(MachineInstr::FmNoNans)) {
9086 // Insert canonicalizes if it's possible we need to quiet to get correct
9087 // sNaN behavior.
9088
9089 // Note this must be done here, and not as an optimization combine in the
9090 // absence of a dedicate quiet-snan instruction as we're using an
9091 // omni-purpose G_FCANONICALIZE.
9092 if (!VT->isKnownNeverSNaN(Src0))
9093 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
9094
9095 if (!VT->isKnownNeverSNaN(Src1))
9096 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
9097 }
9098
9099 // If there are no nans, it's safe to simply replace this with the non-IEEE
9100 // version.
9101 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
9102 MI.eraseFromParent();
9103 return Legalized;
9104}
9105
9108 unsigned Opc = MI.getOpcode();
9109 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9110 LLT Ty = MRI.getType(Dst);
9111 const LLT CmpTy = Ty.changeElementType(LLT::integer(1));
9112
9113 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
9114 unsigned OpcIeee =
9115 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9116 unsigned OpcNonIeee =
9117 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9118 bool MinMaxMustRespectOrderedZero = false;
9119 Register Res;
9120
9121 // IEEE variants don't need canonicalization
9122 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9123 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
9124 MinMaxMustRespectOrderedZero = true;
9125 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9126 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
9127 } else {
9128 auto Compare = MIRBuilder.buildFCmp(
9129 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
9130 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9131 }
9132
9133 // Propagate any NaN of both operands
9134 if (!MI.getFlag(MachineInstr::FmNoNans) &&
9135 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9136 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
9137
9138 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
9139 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
9140 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
9141 if (Ty.isVector())
9142 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9143
9144 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9145 }
9146
9147 // fminimum/fmaximum requires -0.0 less than +0.0
9148 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
9149 GISelValueTracking VT(MIRBuilder.getMF());
9150 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
9151 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
9152
9153 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
9154 const unsigned Flags = MI.getFlags();
9155 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
9156 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
9157
9158 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
9159
9160 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9161 auto LHSSelect =
9162 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9163
9164 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9165 auto RHSSelect =
9166 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9167
9168 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9169 }
9170 }
9171
9172 MIRBuilder.buildCopy(Dst, Res);
9173 MI.eraseFromParent();
9174 return Legalized;
9175}
9176
9178 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9179 Register DstReg = MI.getOperand(0).getReg();
9180 LLT Ty = MRI.getType(DstReg);
9181 unsigned Flags = MI.getFlags();
9182
9183 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
9184 Flags);
9185 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
9186 MI.eraseFromParent();
9187 return Legalized;
9188}
9189
9192 auto [DstReg, X] = MI.getFirst2Regs();
9193 const unsigned Flags = MI.getFlags();
9194 const LLT Ty = MRI.getType(DstReg);
9195 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
9196
9197 // round(x) =>
9198 // t = trunc(x);
9199 // d = fabs(x - t);
9200 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9201 // return t + o;
9202
9203 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
9204
9205 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
9206 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
9207
9208 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
9209 auto Cmp =
9210 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
9211
9212 // Could emit G_UITOFP instead
9213 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
9214 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9215 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9216 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
9217
9218 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9219
9220 MI.eraseFromParent();
9221 return Legalized;
9222}
9223
9225 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9226 unsigned Flags = MI.getFlags();
9227 LLT Ty = MRI.getType(DstReg);
9228 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
9229
9230 // result = trunc(src);
9231 // if (src < 0.0 && src != result)
9232 // result += -1.0.
9233
9234 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9235 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9236
9237 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9238 SrcReg, Zero, Flags);
9239 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9240 SrcReg, Trunc, Flags);
9241 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9242 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9243
9244 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9245 MI.eraseFromParent();
9246 return Legalized;
9247}
9248
9251 const unsigned NumOps = MI.getNumOperands();
9252 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9253 unsigned PartSize = Src0Ty.getSizeInBits();
9254
9255 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9256 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9257
9258 for (unsigned I = 2; I != NumOps; ++I) {
9259 const unsigned Offset = (I - 1) * PartSize;
9260
9261 Register SrcReg = MI.getOperand(I).getReg();
9262 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9263
9264 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9265 MRI.createGenericVirtualRegister(WideTy);
9266
9267 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9268 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9269 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9270 ResultReg = NextResult;
9271 }
9272
9273 if (DstTy.isPointer()) {
9274 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9275 DstTy.getAddressSpace())) {
9276 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9277 return UnableToLegalize;
9278 }
9279
9280 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9281 }
9282
9283 MI.eraseFromParent();
9284 return Legalized;
9285}
9286
9289 const unsigned NumDst = MI.getNumOperands() - 1;
9290 Register SrcReg = MI.getOperand(NumDst).getReg();
9291 Register Dst0Reg = MI.getOperand(0).getReg();
9292 LLT DstTy = MRI.getType(Dst0Reg);
9293 if (DstTy.isPointer())
9294 return UnableToLegalize; // TODO
9295
9296 SrcReg = coerceToScalar(SrcReg);
9297 if (!SrcReg)
9298 return UnableToLegalize;
9299
9300 // Expand scalarizing unmerge as bitcast to integer and shift.
9301 LLT IntTy = MRI.getType(SrcReg);
9302
9303 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9304
9305 const unsigned DstSize = DstTy.getSizeInBits();
9306 unsigned Offset = DstSize;
9307 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9308 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9309 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9310 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9311 }
9312
9313 MI.eraseFromParent();
9314 return Legalized;
9315}
9316
9317/// Lower a vector extract or insert by writing the vector to a stack temporary
9318/// and reloading the element or vector.
9319///
9320/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9321/// =>
9322/// %stack_temp = G_FRAME_INDEX
9323/// G_STORE %vec, %stack_temp
9324/// %idx = clamp(%idx, %vec.getNumElements())
9325/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9326/// %dst = G_LOAD %element_ptr
9329 Register DstReg = MI.getOperand(0).getReg();
9330 Register SrcVec = MI.getOperand(1).getReg();
9331 Register InsertVal;
9332 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9333 InsertVal = MI.getOperand(2).getReg();
9334
9335 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9336
9337 LLT VecTy = MRI.getType(SrcVec);
9338 LLT EltTy = VecTy.getElementType();
9339 unsigned NumElts = VecTy.getNumElements();
9340
9341 int64_t IdxVal;
9342 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9344 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9345
9346 if (InsertVal) {
9347 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9348 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9349 } else {
9350 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9351 }
9352
9353 MI.eraseFromParent();
9354 return Legalized;
9355 }
9356
9357 if (!EltTy.isByteSized()) { // Not implemented.
9358 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9359 return UnableToLegalize;
9360 }
9361
9362 unsigned EltBytes = EltTy.getSizeInBytes();
9363 Align VecAlign = getStackTemporaryAlignment(VecTy);
9364 Align EltAlign;
9365
9366 MachinePointerInfo PtrInfo;
9367 auto StackTemp = createStackTemporary(
9368 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9369 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9370
9371 // Get the pointer to the element, and be sure not to hit undefined behavior
9372 // if the index is out of bounds.
9373 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9374
9375 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9376 int64_t Offset = IdxVal * EltBytes;
9377 PtrInfo = PtrInfo.getWithOffset(Offset);
9378 EltAlign = commonAlignment(VecAlign, Offset);
9379 } else {
9380 // We lose information with a variable offset.
9381 EltAlign = getStackTemporaryAlignment(EltTy);
9382 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9383 }
9384
9385 if (InsertVal) {
9386 // Write the inserted element
9387 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9388
9389 // Reload the whole vector.
9390 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9391 } else {
9392 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9393 }
9394
9395 MI.eraseFromParent();
9396 return Legalized;
9397}
9398
9401 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9402 MI.getFirst3RegLLTs();
9403 LLT IdxTy = LLT::scalar(32);
9404
9405 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9408 LLT EltTy = DstTy.getScalarType();
9409
9410 DenseMap<unsigned, Register> CachedExtract;
9411
9412 for (int Idx : Mask) {
9413 if (Idx < 0) {
9414 if (!Undef.isValid())
9415 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9416 BuildVec.push_back(Undef);
9417 continue;
9418 }
9419
9420 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9421
9422 int NumElts = Src0Ty.getNumElements();
9423 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9424 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9425 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9426 if (Inserted) {
9427 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9428 It->second =
9429 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9430 }
9431 BuildVec.push_back(It->second);
9432 }
9433
9434 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9435 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9436 MI.eraseFromParent();
9437 return Legalized;
9438}
9439
9442 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9443 MI.getFirst4RegLLTs();
9444
9445 if (VecTy.isScalableVector())
9446 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9447
9448 Align VecAlign = getStackTemporaryAlignment(VecTy);
9449 MachinePointerInfo PtrInfo;
9450 Register StackPtr =
9451 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9452 PtrInfo)
9453 .getReg(0);
9454 MachinePointerInfo ValPtrInfo =
9456
9457 LLT IdxTy = LLT::scalar(32);
9458 LLT ValTy = VecTy.getElementType();
9459 Align ValAlign = getStackTemporaryAlignment(ValTy);
9460
9461 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9462
9463 bool HasPassthru =
9464 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9465
9466 if (HasPassthru)
9467 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9468
9469 Register LastWriteVal;
9470 std::optional<APInt> PassthruSplatVal =
9471 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9472
9473 if (PassthruSplatVal.has_value()) {
9474 LastWriteVal =
9475 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9476 } else if (HasPassthru) {
9477 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9478 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9479 {LLT::scalar(32)}, {Popcount});
9480
9481 Register LastElmtPtr =
9482 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9483 LastWriteVal =
9484 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9485 .getReg(0);
9486 }
9487
9488 unsigned NumElmts = VecTy.getNumElements();
9489 for (unsigned I = 0; I < NumElmts; ++I) {
9490 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9491 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9492 Register ElmtPtr =
9493 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9494 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9495
9496 LLT MaskITy = MaskTy.getElementType();
9497 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9498 if (MaskITy.getSizeInBits() > 1)
9499 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9500
9501 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9502 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9503
9504 if (HasPassthru && I == NumElmts - 1) {
9505 auto EndOfVector =
9506 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9507 auto AllLanesSelected = MIRBuilder.buildICmp(
9508 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9509 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9510 {OutPos, EndOfVector});
9511 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9512
9513 LastWriteVal =
9514 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9515 .getReg(0);
9516 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9517 }
9518 }
9519
9520 // TODO: Use StackPtr's FrameIndex alignment.
9521 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9522
9523 MI.eraseFromParent();
9524 return Legalized;
9525}
9526
9528 Register AllocSize,
9529 Align Alignment,
9530 LLT PtrTy) {
9531 LLT IntPtrTy = LLT::integer(PtrTy.getSizeInBits());
9532
9533 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9534 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9535
9536 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9537 // have to generate an extra instruction to negate the alloc and then use
9538 // G_PTR_ADD to add the negative offset.
9539 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9540 if (Alignment > Align(1)) {
9541 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9542 AlignMask.negate();
9543 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9544 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9545 }
9546
9547 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9548}
9549
9552 const auto &MF = *MI.getMF();
9553 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9554 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9555 return UnableToLegalize;
9556
9557 Register Dst = MI.getOperand(0).getReg();
9558 Register AllocSize = MI.getOperand(1).getReg();
9559 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9560
9561 LLT PtrTy = MRI.getType(Dst);
9562 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9563 Register SPTmp =
9564 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9565
9566 MIRBuilder.buildCopy(SPReg, SPTmp);
9567 MIRBuilder.buildCopy(Dst, SPTmp);
9568
9569 MI.eraseFromParent();
9570 return Legalized;
9571}
9572
9575 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9576 if (!StackPtr)
9577 return UnableToLegalize;
9578
9579 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9580 MI.eraseFromParent();
9581 return Legalized;
9582}
9583
9586 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9587 if (!StackPtr)
9588 return UnableToLegalize;
9589
9590 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9591 MI.eraseFromParent();
9592 return Legalized;
9593}
9594
9597 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9598 unsigned Offset = MI.getOperand(2).getImm();
9599
9600 // Extract sub-vector or one element
9601 if (SrcTy.isVector()) {
9602 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9603 unsigned DstSize = DstTy.getSizeInBits();
9604
9605 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9606 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9607 // Unmerge and allow access to each Src element for the artifact combiner.
9608 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9609
9610 // Take element(s) we need to extract and copy it (merge them).
9611 SmallVector<Register, 8> SubVectorElts;
9612 for (unsigned Idx = Offset / SrcEltSize;
9613 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9614 SubVectorElts.push_back(Unmerge.getReg(Idx));
9615 }
9616 if (SubVectorElts.size() == 1)
9617 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9618 else
9619 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9620
9621 MI.eraseFromParent();
9622 return Legalized;
9623 }
9624 }
9625
9626 const DataLayout &DL = MIRBuilder.getDataLayout();
9627 if ((SrcTy.isPointer() &&
9628 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9629 (DstTy.isPointer() &&
9630 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9631 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9632 return UnableToLegalize;
9633 }
9634
9635 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9636 (SrcTy.isScalar() || SrcTy.isPointer() ||
9637 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9638 LLT SrcIntTy = SrcTy;
9639 if (!SrcTy.isScalar()) {
9640 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9641 SrcReg = MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9642 }
9643
9644 Register ResultReg = DstReg;
9645 if (DstTy.isPointer())
9646 ResultReg =
9647 MRI.createGenericVirtualRegister(LLT::scalar(DstTy.getSizeInBits()));
9648
9649 if (Offset == 0)
9650 MIRBuilder.buildTrunc(ResultReg, SrcReg);
9651 else {
9652 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9653 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9654 MIRBuilder.buildTrunc(ResultReg, Shr);
9655 }
9656
9657 if (DstTy.isPointer())
9658 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9659
9660 MI.eraseFromParent();
9661 return Legalized;
9662 }
9663
9664 return UnableToLegalize;
9665}
9666
9668 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9669 uint64_t Offset = MI.getOperand(3).getImm();
9670
9671 LLT DstTy = MRI.getType(Src);
9672 LLT InsertTy = MRI.getType(InsertSrc);
9673
9674 const DataLayout &DL = MIRBuilder.getDataLayout();
9675 bool IsNonIntegralInsert =
9676 InsertTy.isPointerOrPointerVector() &&
9677 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace());
9678 bool IsNonIntegralDst = DstTy.isPointerOrPointerVector() &&
9679 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace());
9680
9681 // Insert sub-vector or one element
9682 if (DstTy.isVector()) {
9683 LLT EltTy = DstTy.getElementType();
9684
9685 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9686 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9687 return UnableToLegalize;
9688 }
9689
9690 unsigned EltSize = EltTy.getSizeInBits();
9691 unsigned InsertSize = InsertTy.getSizeInBits();
9692
9693 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9694 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9695 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9697 unsigned Idx = 0;
9698 // Elements from Src before insert start Offset
9699 for (; Idx < Offset / EltSize; ++Idx) {
9700 DstElts.push_back(UnmergeSrc.getReg(Idx));
9701 }
9702
9703 // Replace elements in Src with elements from InsertSrc
9704 if (InsertTy.getSizeInBits() > EltSize) {
9705 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9706 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9707 ++Idx, ++i) {
9708 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9709 }
9710 } else {
9711 if (InsertTy.isPointer() && !EltTy.isPointer())
9712 InsertSrc = MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9713 else if (!InsertTy.isPointer() && EltTy.isPointer())
9714 InsertSrc = MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9715 DstElts.push_back(InsertSrc);
9716 ++Idx;
9717 }
9718
9719 // Remaining elements from Src after insert
9720 for (; Idx < DstTy.getNumElements(); ++Idx) {
9721 DstElts.push_back(UnmergeSrc.getReg(Idx));
9722 }
9723
9724 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9725 MI.eraseFromParent();
9726 return Legalized;
9727 }
9728 }
9729
9730 if (InsertTy.isVector() ||
9731 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9732 return UnableToLegalize;
9733
9734 if (IsNonIntegralDst || IsNonIntegralInsert) {
9735 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9736 return UnableToLegalize;
9737 }
9738
9739 LLT IntDstTy = DstTy;
9740
9741 if (!DstTy.isScalar()) {
9742 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9743 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9744 }
9745
9746 if (!InsertTy.isScalar()) {
9747 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9748 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9749 }
9750
9751 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9752 if (Offset != 0) {
9753 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9754 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9755 }
9756
9758 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9759
9760 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9761 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9762 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9763
9764 MIRBuilder.buildCast(Dst, Or);
9765 MI.eraseFromParent();
9766 return Legalized;
9767}
9768
9771 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9772 MI.getFirst4RegLLTs();
9773 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9774
9775 LLT Ty = Dst0Ty;
9776 LLT BoolTy = Dst1Ty;
9777
9778 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9779
9780 if (IsAdd)
9781 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9782 else
9783 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9784
9785 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9786
9787 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9788
9789 if (IsAdd) {
9790 // For an addition, the result should be less than one of the operands (LHS)
9791 // if and only if the other operand (RHS) is negative, otherwise there will
9792 // be overflow.
9793 auto ResultLowerThanLHS =
9794 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9795 auto RHSNegative =
9796 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, RHS, Zero);
9797 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9798 } else {
9799 // For subtraction, overflow occurs when the signed comparison of operands
9800 // doesn't match the sign of the result.
9801 auto LHSLessThanRHS =
9802 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, RHS);
9803 auto ResultNegative =
9804 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, Zero);
9805 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9806 }
9807
9808 MIRBuilder.buildCopy(Dst0, NewDst0);
9809 MI.eraseFromParent();
9810
9811 return Legalized;
9812}
9813
9815 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9816 const LLT Ty = MRI.getType(Res);
9817
9818 // sum = LHS + RHS + zext(CarryIn)
9819 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9820 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9821 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9822 MIRBuilder.buildCopy(Res, Sum);
9823
9824 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9825 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9826 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9827 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9828
9829 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9830 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9831
9832 MI.eraseFromParent();
9833 return Legalized;
9834}
9835
9837 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9838 const LLT Ty = MRI.getType(Res);
9839
9840 // Diff = LHS - (RHS + zext(CarryIn))
9841 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9842 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9843 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9844 MIRBuilder.buildCopy(Res, Diff);
9845
9846 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9847 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9848 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9849 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9850 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9851 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9852
9853 MI.eraseFromParent();
9854 return Legalized;
9855}
9856
9859 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9860 LLT Ty = MRI.getType(Res);
9861 bool IsSigned;
9862 bool IsAdd;
9863 unsigned BaseOp;
9864 switch (MI.getOpcode()) {
9865 default:
9866 llvm_unreachable("unexpected addsat/subsat opcode");
9867 case TargetOpcode::G_UADDSAT:
9868 IsSigned = false;
9869 IsAdd = true;
9870 BaseOp = TargetOpcode::G_ADD;
9871 break;
9872 case TargetOpcode::G_SADDSAT:
9873 IsSigned = true;
9874 IsAdd = true;
9875 BaseOp = TargetOpcode::G_ADD;
9876 break;
9877 case TargetOpcode::G_USUBSAT:
9878 IsSigned = false;
9879 IsAdd = false;
9880 BaseOp = TargetOpcode::G_SUB;
9881 break;
9882 case TargetOpcode::G_SSUBSAT:
9883 IsSigned = true;
9884 IsAdd = false;
9885 BaseOp = TargetOpcode::G_SUB;
9886 break;
9887 }
9888
9889 if (IsSigned) {
9890 // sadd.sat(a, b) ->
9891 // hi = 0x7fffffff - smax(a, 0)
9892 // lo = 0x80000000 - smin(a, 0)
9893 // a + smin(smax(lo, b), hi)
9894 // ssub.sat(a, b) ->
9895 // lo = smax(a, -1) - 0x7fffffff
9896 // hi = smin(a, -1) - 0x80000000
9897 // a - smin(smax(lo, b), hi)
9898 // TODO: AMDGPU can use a "median of 3" instruction here:
9899 // a +/- med3(lo, b, hi)
9900 uint64_t NumBits = Ty.getScalarSizeInBits();
9901 auto MaxVal =
9902 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9903 auto MinVal =
9904 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9906 if (IsAdd) {
9907 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9908 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9909 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9910 } else {
9911 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9912 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9913 MaxVal);
9914 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9915 MinVal);
9916 }
9917 auto RHSClamped =
9918 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9919 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9920 } else {
9921 // uadd.sat(a, b) -> a + umin(~a, b)
9922 // usub.sat(a, b) -> a - umin(a, b)
9923 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9924 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9925 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9926 }
9927
9928 MI.eraseFromParent();
9929 return Legalized;
9930}
9931
9934 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9935 LLT Ty = MRI.getType(Res);
9936 LLT BoolTy = Ty.changeElementSize(1);
9937 bool IsSigned;
9938 bool IsAdd;
9939 unsigned OverflowOp;
9940 switch (MI.getOpcode()) {
9941 default:
9942 llvm_unreachable("unexpected addsat/subsat opcode");
9943 case TargetOpcode::G_UADDSAT:
9944 IsSigned = false;
9945 IsAdd = true;
9946 OverflowOp = TargetOpcode::G_UADDO;
9947 break;
9948 case TargetOpcode::G_SADDSAT:
9949 IsSigned = true;
9950 IsAdd = true;
9951 OverflowOp = TargetOpcode::G_SADDO;
9952 break;
9953 case TargetOpcode::G_USUBSAT:
9954 IsSigned = false;
9955 IsAdd = false;
9956 OverflowOp = TargetOpcode::G_USUBO;
9957 break;
9958 case TargetOpcode::G_SSUBSAT:
9959 IsSigned = true;
9960 IsAdd = false;
9961 OverflowOp = TargetOpcode::G_SSUBO;
9962 break;
9963 }
9964
9965 auto OverflowRes =
9966 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9967 Register Tmp = OverflowRes.getReg(0);
9968 Register Ov = OverflowRes.getReg(1);
9969 MachineInstrBuilder Clamp;
9970 if (IsSigned) {
9971 // sadd.sat(a, b) ->
9972 // {tmp, ov} = saddo(a, b)
9973 // ov ? (tmp >>s 31) + 0x80000000 : r
9974 // ssub.sat(a, b) ->
9975 // {tmp, ov} = ssubo(a, b)
9976 // ov ? (tmp >>s 31) + 0x80000000 : r
9977 uint64_t NumBits = Ty.getScalarSizeInBits();
9978 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9979 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9980 auto MinVal =
9981 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9982 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9983 } else {
9984 // uadd.sat(a, b) ->
9985 // {tmp, ov} = uaddo(a, b)
9986 // ov ? 0xffffffff : tmp
9987 // usub.sat(a, b) ->
9988 // {tmp, ov} = usubo(a, b)
9989 // ov ? 0 : tmp
9990 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9991 }
9992 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9993
9994 MI.eraseFromParent();
9995 return Legalized;
9996}
9997
10000 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
10001 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
10002 "Expected shlsat opcode!");
10003 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10004 auto [Res, LHS, RHS] = MI.getFirst3Regs();
10005 LLT Ty = MRI.getType(Res);
10006 LLT BoolTy = Ty.changeElementSize(1);
10007
10008 unsigned BW = Ty.getScalarSizeInBits();
10009 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
10010 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
10011 : MIRBuilder.buildLShr(Ty, Result, RHS);
10012
10013 MachineInstrBuilder SatVal;
10014 if (IsSigned) {
10015 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
10016 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
10017 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
10018 MIRBuilder.buildConstant(Ty, 0));
10019 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10020 } else {
10021 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
10022 }
10023 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
10024 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10025
10026 MI.eraseFromParent();
10027 return Legalized;
10028}
10029
10031 auto [Dst, Src] = MI.getFirst2Regs();
10032 const LLT Ty = MRI.getType(Src);
10033 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10034 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10035
10036 // Swap most and least significant byte, set remaining bytes in Res to zero.
10037 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10038 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10039 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10040 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10041
10042 // Set i-th high/low byte in Res to i-th low/high byte from Src.
10043 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
10044 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
10045 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10046 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
10047 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10048 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
10049 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
10050 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10051 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10052 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
10053 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10054 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10055 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10056 }
10057 Res.getInstr()->getOperand(0).setReg(Dst);
10058
10059 MI.eraseFromParent();
10060 return Legalized;
10061}
10062
10063//{ (Src & Mask) >> N } | { (Src << N) & Mask }
10065 MachineInstrBuilder Src, const APInt &Mask) {
10066 const LLT Ty = Dst.getLLTTy(*B.getMRI());
10067 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
10068 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
10069 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10070 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10071 return B.buildOr(Dst, LHS, RHS);
10072}
10073
10076 auto [Dst, Src] = MI.getFirst2Regs();
10077 const LLT SrcTy = MRI.getType(Src);
10078 unsigned Size = SrcTy.getScalarSizeInBits();
10079 unsigned VSize = SrcTy.getSizeInBits();
10080
10081 if (Size >= 8) {
10082 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10083 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10084 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10085 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10086 // If bitreverse is legal for i8 vector of the same size, then cast
10087 // to i8 vector type.
10088 // e.g. v4s32 -> v16s8
10089 LLT VTy = LLT::fixed_vector(VSize / 8, LLT::integer(8));
10090 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
10091 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
10092 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
10093 MIRBuilder.buildBitcast(Dst, RBIT);
10094 } else {
10095 MachineInstrBuilder BSWAP =
10096 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10097
10098 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
10099 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
10100 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
10101 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
10102 APInt::getSplat(Size, APInt(8, 0xF0)));
10103
10104 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
10105 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
10106 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
10107 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
10108 APInt::getSplat(Size, APInt(8, 0xCC)));
10109
10110 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
10111 // 6|7
10112 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
10113 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
10114 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
10115 }
10116 } else {
10117 // Expand bitreverse for types smaller than 8 bits.
10119 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
10121 if (I < J) {
10122 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
10123 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
10124 } else {
10125 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
10126 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
10127 }
10128
10129 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
10130 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
10131 if (I == 0)
10132 Tmp = Tmp2;
10133 else
10134 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
10135 }
10136 MIRBuilder.buildCopy(Dst, Tmp);
10137 }
10138
10139 MI.eraseFromParent();
10140 return Legalized;
10141}
10142
10145 MachineFunction &MF = MIRBuilder.getMF();
10146
10147 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10148 int NameOpIdx = IsRead ? 1 : 0;
10149 int ValRegIndex = IsRead ? 0 : 1;
10150
10151 Register ValReg = MI.getOperand(ValRegIndex).getReg();
10152 const LLT Ty = MRI.getType(ValReg);
10153 const MDString *RegStr = cast<MDString>(
10154 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10155
10156 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
10157 if (!PhysReg) {
10158 const Function &Fn = MF.getFunction();
10160 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
10161 (IsRead ? "llvm.read_register" : "llvm.write_register"),
10162 Fn, MI.getDebugLoc()));
10163 if (IsRead)
10164 MIRBuilder.buildUndef(ValReg);
10165
10166 MI.eraseFromParent();
10167 return Legalized;
10168 }
10169
10170 if (IsRead)
10171 MIRBuilder.buildCopy(ValReg, PhysReg);
10172 else
10173 MIRBuilder.buildCopy(PhysReg, ValReg);
10174
10175 MI.eraseFromParent();
10176 return Legalized;
10177}
10178
10181 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
10182 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10183 Register Result = MI.getOperand(0).getReg();
10184 LLT OrigTy = MRI.getType(Result);
10185 auto SizeInBits = OrigTy.getScalarSizeInBits();
10186 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
10187
10188 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
10189 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
10190 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
10191 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10192
10193 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
10194 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
10195 MIRBuilder.buildTrunc(Result, Shifted);
10196
10197 MI.eraseFromParent();
10198 return Legalized;
10199}
10200
10203 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10204 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
10205
10206 if (Mask == fcNone) {
10207 MIRBuilder.buildConstant(DstReg, 0);
10208 MI.eraseFromParent();
10209 return Legalized;
10210 }
10211 if (Mask == fcAllFlags) {
10212 MIRBuilder.buildConstant(DstReg, 1);
10213 MI.eraseFromParent();
10214 return Legalized;
10215 }
10216
10217 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10218 // version
10219
10220 unsigned BitSize = SrcTy.getScalarSizeInBits();
10221 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
10222
10223 LLT IntTy = SrcTy.changeElementType(LLT::integer(BitSize));
10224 auto AsInt = SrcTy == IntTy ? MIRBuilder.buildCopy(IntTy, SrcReg)
10225 : MIRBuilder.buildBitcast(IntTy, SrcReg);
10226
10227 // Various masks.
10228 APInt SignBit = APInt::getSignMask(BitSize);
10229 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10230 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10231 APInt ExpMask = Inf;
10232 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10233 APInt QNaNBitMask =
10234 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10235 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
10236
10237 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
10238 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
10239 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
10240 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
10241 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
10242
10243 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10244 auto Sign =
10245 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
10246
10247 auto Res = MIRBuilder.buildConstant(DstTy, 0);
10248 // Clang doesn't support capture of structured bindings:
10249 LLT DstTyCopy = DstTy;
10250 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10251 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10252 };
10253
10254 // Tests that involve more than one class should be processed first.
10255 if ((Mask & fcFinite) == fcFinite) {
10256 // finite(V) ==> abs(V) u< exp_mask
10257 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10258 ExpMaskC));
10259 Mask &= ~fcFinite;
10260 } else if ((Mask & fcFinite) == fcPosFinite) {
10261 // finite(V) && V > 0 ==> V u< exp_mask
10262 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10263 ExpMaskC));
10264 Mask &= ~fcPosFinite;
10265 } else if ((Mask & fcFinite) == fcNegFinite) {
10266 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10267 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10268 ExpMaskC);
10269 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10270 appendToRes(And);
10271 Mask &= ~fcNegFinite;
10272 }
10273
10274 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10275 // fcZero | fcSubnormal => test all exponent bits are 0
10276 // TODO: Handle sign bit specific cases
10277 // TODO: Handle inverted case
10278 if (PartialCheck == (fcZero | fcSubnormal)) {
10279 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10280 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10281 ExpBits, ZeroC));
10282 Mask &= ~PartialCheck;
10283 }
10284 }
10285
10286 // Check for individual classes.
10287 if (FPClassTest PartialCheck = Mask & fcZero) {
10288 if (PartialCheck == fcPosZero)
10289 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10290 AsInt, ZeroC));
10291 else if (PartialCheck == fcZero)
10292 appendToRes(
10293 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10294 else // fcNegZero
10295 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10296 AsInt, SignBitC));
10297 }
10298
10299 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10300 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10301 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10302 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10303 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10304 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10305 auto SubnormalRes =
10306 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10307 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10308 if (PartialCheck == fcNegSubnormal)
10309 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10310 appendToRes(SubnormalRes);
10311 }
10312
10313 if (FPClassTest PartialCheck = Mask & fcInf) {
10314 if (PartialCheck == fcPosInf)
10315 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10316 AsInt, InfC));
10317 else if (PartialCheck == fcInf)
10318 appendToRes(
10319 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10320 else { // fcNegInf
10321 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10322 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10323 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10324 AsInt, NegInfC));
10325 }
10326 }
10327
10328 if (FPClassTest PartialCheck = Mask & fcNan) {
10329 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10330 if (PartialCheck == fcNan) {
10331 // isnan(V) ==> abs(V) u> int(inf)
10332 appendToRes(
10333 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10334 } else if (PartialCheck == fcQNan) {
10335 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10336 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10337 InfWithQnanBitC));
10338 } else { // fcSNan
10339 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10340 // abs(V) u< (unsigned(Inf) | quiet_bit)
10341 auto IsNan =
10342 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10343 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10344 Abs, InfWithQnanBitC);
10345 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10346 }
10347 }
10348
10349 if (FPClassTest PartialCheck = Mask & fcNormal) {
10350 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10351 // (max_exp-1))
10352 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10353 auto ExpMinusOne = MIRBuilder.buildSub(
10354 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10355 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10356 auto NormalRes =
10357 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10358 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10359 if (PartialCheck == fcNegNormal)
10360 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10361 else if (PartialCheck == fcPosNormal) {
10362 auto PosSign = MIRBuilder.buildXor(
10363 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10364 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10365 }
10366 appendToRes(NormalRes);
10367 }
10368
10369 MIRBuilder.buildCopy(DstReg, Res);
10370 MI.eraseFromParent();
10371 return Legalized;
10372}
10373
10375 // Implement G_SELECT in terms of XOR, AND, OR.
10376 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10377 MI.getFirst4RegLLTs();
10378
10379 LLT Op1TyInt =
10380 Op1Ty.changeElementType(LLT::integer(Op1Ty.getScalarSizeInBits()));
10381
10382 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10383 if (IsEltPtr) {
10384 LLT ScalarPtrTy = LLT::integer(DstTy.getScalarSizeInBits());
10385 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10386 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10387 Op1Ty = MRI.getType(Op1Reg);
10388 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10389 Op2Ty = MRI.getType(Op2Reg);
10390 DstTy = NewTy;
10391 }
10392
10393 if (MaskTy.isScalar()) {
10394 // Turn the scalar condition into a vector condition mask if needed.
10395
10396 Register MaskElt = MaskReg;
10397
10398 // The condition was potentially zero extended before, but we want a sign
10399 // extended boolean.
10400 if (MaskTy != LLT::scalar(1))
10401 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10402
10403 // Continue the sign extension (or truncate) to match the data type.
10404 MaskTy = DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
10405 MaskElt =
10406 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10407
10408 if (DstTy.isVector()) {
10409 // Generate a vector splat idiom.
10410 auto ShufSplat = MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10411 MaskReg = ShufSplat.getReg(0);
10412 } else {
10413 MaskReg = MaskElt;
10414 }
10415 } else if (!DstTy.isVector()) {
10416 // Cannot handle the case that mask is a vector and dst is a scalar.
10417 return UnableToLegalize;
10418 }
10419
10420 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10421 return UnableToLegalize;
10422 }
10423
10424 if (!Op1Ty.getScalarType().isAnyScalar() &&
10425 !Op1Ty.getScalarType().isInteger())
10426 Op1Reg = MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10427
10428 if (!Op2Ty.getScalarType().isAnyScalar() &&
10429 !Op2Ty.getScalarType().isInteger()) {
10430 auto Op2TyInt =
10431 Op2Ty.changeElementType(LLT::integer(Op2Ty.getScalarSizeInBits()));
10432 Op2Reg = MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10433 }
10434
10435 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10436 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10437 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10438 if (IsEltPtr) {
10439 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10440 MIRBuilder.buildIntToPtr(DstReg, Or);
10441 } else {
10442 if (DstTy == Op1TyInt)
10443 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10444 else {
10445 auto Or = MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10446 MIRBuilder.buildBitcast(DstReg, Or.getReg(0));
10447 }
10448 }
10449 MI.eraseFromParent();
10450 return Legalized;
10451}
10452
10454 // Split DIVREM into individual instructions.
10455 unsigned Opcode = MI.getOpcode();
10456
10457 MIRBuilder.buildInstr(
10458 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10459 : TargetOpcode::G_UDIV,
10460 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10461 MIRBuilder.buildInstr(
10462 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10463 : TargetOpcode::G_UREM,
10464 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10465 MI.eraseFromParent();
10466 return Legalized;
10467}
10468
10471 // Expand %res = G_ABS %a into:
10472 // %v1 = G_ASHR %a, scalar_size-1
10473 // %v2 = G_ADD %a, %v1
10474 // %res = G_XOR %v2, %v1
10475 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10476 Register OpReg = MI.getOperand(1).getReg();
10477 auto ShiftAmt =
10478 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10479 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10480 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10481 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10482 MI.eraseFromParent();
10483 return Legalized;
10484}
10485
10488 // Expand %res = G_ABS %a into:
10489 // %v1 = G_CONSTANT 0
10490 // %v2 = G_SUB %v1, %a
10491 // %res = G_SMAX %a, %v2
10492 Register SrcReg = MI.getOperand(1).getReg();
10493 LLT Ty = MRI.getType(SrcReg);
10494 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10495 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10496 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10497 MI.eraseFromParent();
10498 return Legalized;
10499}
10500
10503 Register SrcReg = MI.getOperand(1).getReg();
10504 Register DestReg = MI.getOperand(0).getReg();
10505 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10506 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10507 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10508 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10509 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10510 MI.eraseFromParent();
10511 return Legalized;
10512}
10513
10516 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10517 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10518 "Expected G_ABDS or G_ABDU instruction");
10519
10520 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10521 LLT Ty = MRI.getType(LHS);
10522
10523 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10524 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10525 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10526 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10527 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10530 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10531 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10532
10533 MI.eraseFromParent();
10534 return Legalized;
10535}
10536
10539 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10540 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10541 "Expected G_ABDS or G_ABDU instruction");
10542
10543 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10544 LLT Ty = MRI.getType(LHS);
10545
10546 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10547 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10548 Register MaxReg, MinReg;
10549 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10550 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10551 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10552 } else {
10553 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10554 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10555 }
10556 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10557
10558 MI.eraseFromParent();
10559 return Legalized;
10560}
10561
10563 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10564 LLT TyInt =
10565 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
10566 Register CastedSrc = SrcReg;
10567
10568 if (!(SrcTy.getScalarType().isAnyScalar() ||
10569 SrcTy.getScalarType().isInteger())) {
10570 auto SrcTyInt =
10571 SrcTy.changeElementType(LLT::integer(SrcTy.getScalarSizeInBits()));
10572 CastedSrc = MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10573 }
10574
10575 if (MRI.getType(DstReg) != TyInt) {
10576 // Reset sign bit
10577 Register NewDst =
10579 .buildAnd(TyInt, CastedSrc,
10580 MIRBuilder.buildConstant(
10582 DstTy.getScalarSizeInBits())))
10583 .getReg(0);
10584
10585 MIRBuilder.buildBitcast(DstReg, NewDst);
10586 } else
10588 .buildAnd(
10589 DstReg, CastedSrc,
10590 MIRBuilder.buildConstant(
10591 TyInt, APInt::getSignedMaxValue(DstTy.getScalarSizeInBits())))
10592 .getReg(0);
10593
10594 MI.eraseFromParent();
10595 return Legalized;
10596}
10597
10600 Register SrcReg = MI.getOperand(1).getReg();
10601 LLT SrcTy = MRI.getType(SrcReg);
10602 LLT DstTy = MRI.getType(SrcReg);
10603
10604 // The source could be a scalar if the IR type was <1 x sN>.
10605 if (SrcTy.isScalar()) {
10606 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10607 return UnableToLegalize; // FIXME: handle extension.
10608 // This can be just a plain copy.
10609 Observer.changingInstr(MI);
10610 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10611 Observer.changedInstr(MI);
10612 return Legalized;
10613 }
10614 return UnableToLegalize;
10615}
10616
10618 MachineFunction &MF = *MI.getMF();
10619 const DataLayout &DL = MIRBuilder.getDataLayout();
10620 LLVMContext &Ctx = MF.getFunction().getContext();
10621 Register ListPtr = MI.getOperand(1).getReg();
10622 LLT PtrTy = MRI.getType(ListPtr);
10623
10624 // LstPtr is a pointer to the head of the list. Get the address
10625 // of the head of the list.
10626 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10627 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10628 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10629 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10630
10631 const Align A(MI.getOperand(2).getImm());
10632 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10633 if (A > TLI.getMinStackArgumentAlignment()) {
10634 Register AlignAmt =
10635 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10636 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10637 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10638 VAList = AndDst.getReg(0);
10639 }
10640
10641 // Increment the pointer, VAList, to the next vaarg
10642 // The list should be bumped by the size of element in the current head of
10643 // list.
10644 Register Dst = MI.getOperand(0).getReg();
10645 LLT LLTTy = MRI.getType(Dst);
10646 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10647 auto IncAmt =
10648 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10649 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10650
10651 // Store the increment VAList to the legalized pointer
10653 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10654 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10655 // Load the actual argument out of the pointer VAList
10656 Align EltAlignment = DL.getABITypeAlign(Ty);
10657 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10658 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10659 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10660
10661 MI.eraseFromParent();
10662 return Legalized;
10663}
10664
10666 [[maybe_unused]] unsigned OpCode = MI.getOpcode();
10667 assert((OpCode == TargetOpcode::G_SMULFIX ||
10668 OpCode == TargetOpcode::G_UMULFIX) &&
10669 "Operator must be either G_SMULFIX or G_UMULFIX!");
10670 auto [Dst, LHS, RHS] = MI.getFirst3Regs();
10671 LLT Ty = MRI.getType(Dst);
10672 unsigned Scale = MI.getOperand(3).getImm();
10673
10674 if (Scale == 0) {
10675 MIRBuilder.buildMul(Dst, LHS, RHS);
10676 MI.eraseFromParent();
10677 return Legalized;
10678 }
10679
10680 // TODO: Port other lowerng paths from SelectionDAG.
10681 LLT WideTy = Ty.changeElementSize(Ty.getScalarSizeInBits() * 2);
10682 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Scale);
10683 MachineInstrBuilder ExtLHS{}, ExtRHS{}, Shift{};
10684 if (MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10685 ExtLHS = MIRBuilder.buildSExt(WideTy, LHS);
10686 ExtRHS = MIRBuilder.buildSExt(WideTy, RHS);
10687 } else {
10688 ExtLHS = MIRBuilder.buildZExt(WideTy, LHS);
10689 ExtRHS = MIRBuilder.buildZExt(WideTy, RHS);
10690 }
10691
10692 auto Mul = MIRBuilder.buildMul(WideTy, ExtLHS, ExtRHS);
10693 if (MI.getOpcode() == TargetOpcode::G_SMULFIX)
10694 Shift = MIRBuilder.buildAShr(WideTy, Mul, ShiftAmt);
10695 else
10696 Shift = MIRBuilder.buildLShr(WideTy, Mul, ShiftAmt);
10697
10698 MIRBuilder.buildTrunc(Dst, Shift);
10699
10700 MI.eraseFromParent();
10701 return Legalized;
10702}
10703
10705 // On Darwin, -Os means optimize for size without hurting performance, so
10706 // only really optimize for size when -Oz (MinSize) is used.
10708 return MF.getFunction().hasMinSize();
10709 return MF.getFunction().hasOptSize();
10710}
10711
10712// Returns a list of types to use for memory op lowering in MemOps. A partial
10713// port of findOptimalMemOpLowering in TargetLowering.
10714static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10715 unsigned Limit, const MemOp &Op,
10716 unsigned DstAS, unsigned SrcAS,
10717 const AttributeList &FuncAttributes,
10718 const TargetLowering &TLI) {
10719 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10720 return false;
10721
10722 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10723
10724 if (Ty == LLT()) {
10725 // Use the largest scalar type whose alignment constraints are satisfied.
10726 // We only need to check DstAlign here as SrcAlign is always greater or
10727 // equal to DstAlign (or zero).
10728 Ty = LLT::integer(64);
10729 if (Op.isFixedDstAlign())
10730 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10731 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10732 Ty = LLT::integer(Ty.getSizeInBytes());
10733 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10734 // FIXME: check for the largest legal type we can load/store to.
10735 }
10736
10737 unsigned NumMemOps = 0;
10738 uint64_t Size = Op.size();
10739 while (Size) {
10740 unsigned TySize = Ty.getSizeInBytes();
10741 while (TySize > Size) {
10742 // For now, only use non-vector load / store's for the left-over pieces.
10743 LLT NewTy = Ty;
10744 // FIXME: check for mem op safety and legality of the types. Not all of
10745 // SDAGisms map cleanly to GISel concepts.
10746 if (NewTy.isVector())
10747 NewTy =
10748 NewTy.getSizeInBits() > 64 ? LLT::integer(64) : LLT::integer(32);
10749 NewTy = LLT::integer(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10750 unsigned NewTySize = NewTy.getSizeInBytes();
10751 assert(NewTySize > 0 && "Could not find appropriate type");
10752
10753 // If the new LLT cannot cover all of the remaining bits, then consider
10754 // issuing a (or a pair of) unaligned and overlapping load / store.
10755 unsigned Fast;
10756 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10757 MVT VT = getMVTForLLT(Ty);
10758 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10760 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10762 Fast)
10763 TySize = Size;
10764 else {
10765 Ty = NewTy;
10766 TySize = NewTySize;
10767 }
10768 }
10769
10770 if (++NumMemOps > Limit)
10771 return false;
10772
10773 MemOps.push_back(Ty);
10774 Size -= TySize;
10775 }
10776
10777 return true;
10778}
10779
10780// Get a vectorized representation of the memset value operand, GISel edition.
10782 MachineRegisterInfo &MRI = *MIB.getMRI();
10783 unsigned NumBits = Ty.getScalarSizeInBits();
10784 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10785 if (!Ty.isVector() && ValVRegAndVal) {
10786 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10787 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10788 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10789 }
10790
10791 // Extend the byte value to the larger type, and then multiply by a magic
10792 // value 0x010101... in order to replicate it across every byte.
10793 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10794 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10795 return MIB.buildConstant(Ty, 0).getReg(0);
10796 }
10797
10798 LLT ExtType = Ty.getScalarType();
10799 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10800 if (NumBits > 8) {
10801 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10802 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10803 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10804 }
10805
10806 // For vector types create a G_BUILD_VECTOR.
10807 if (Ty.isVector())
10808 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10809
10810 return Val;
10811}
10812
10814LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10815 uint64_t KnownLen, Align Alignment,
10816 bool IsVolatile) {
10817 auto &MF = *MI.getParent()->getParent();
10818 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10819 auto &DL = MF.getDataLayout();
10820 LLVMContext &C = MF.getFunction().getContext();
10821
10822 assert(KnownLen != 0 && "Have a zero length memset length!");
10823
10824 bool DstAlignCanChange = false;
10825 MachineFrameInfo &MFI = MF.getFrameInfo();
10826 bool OptSize = shouldLowerMemFuncForSize(MF);
10827
10828 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10829 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10830 DstAlignCanChange = true;
10831
10832 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10833 std::vector<LLT> MemOps;
10834
10835 const auto &DstMMO = **MI.memoperands_begin();
10836 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10837
10838 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10839 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10840
10841 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10842 MemOp::Set(KnownLen, DstAlignCanChange,
10843 Alignment,
10844 /*IsZeroMemset=*/IsZeroVal,
10845 /*IsVolatile=*/IsVolatile),
10846 DstPtrInfo.getAddrSpace(), ~0u,
10847 MF.getFunction().getAttributes(), TLI))
10848 return UnableToLegalize;
10849
10850 if (DstAlignCanChange) {
10851 // Get an estimate of the type from the LLT.
10852 Type *IRTy = getTypeForLLT(MemOps[0], C);
10853 Align NewAlign = DL.getABITypeAlign(IRTy);
10854 if (NewAlign > Alignment) {
10855 Alignment = NewAlign;
10856 unsigned FI = FIDef->getOperand(1).getIndex();
10857 // Give the stack frame object a larger alignment if needed.
10858 if (MFI.getObjectAlign(FI) < Alignment)
10859 MFI.setObjectAlignment(FI, Alignment);
10860 }
10861 }
10862
10863 MachineIRBuilder MIB(MI);
10864 // Find the largest store and generate the bit pattern for it.
10865 LLT LargestTy = MemOps[0];
10866 for (unsigned i = 1; i < MemOps.size(); i++)
10867 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10868 LargestTy = MemOps[i];
10869
10870 // The memset stored value is always defined as an s8, so in order to make it
10871 // work with larger store types we need to repeat the bit pattern across the
10872 // wider type.
10873 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10874
10875 if (!MemSetValue)
10876 return UnableToLegalize;
10877
10878 // Generate the stores. For each store type in the list, we generate the
10879 // matching store of that type to the destination address.
10880 LLT PtrTy = MRI.getType(Dst);
10881 unsigned DstOff = 0;
10882 unsigned Size = KnownLen;
10883 for (unsigned I = 0; I < MemOps.size(); I++) {
10884 LLT Ty = MemOps[I];
10885 unsigned TySize = Ty.getSizeInBytes();
10886 if (TySize > Size) {
10887 // Issuing an unaligned load / store pair that overlaps with the previous
10888 // pair. Adjust the offset accordingly.
10889 assert(I == MemOps.size() - 1 && I != 0);
10890 DstOff -= TySize - Size;
10891 }
10892
10893 // If this store is smaller than the largest store see whether we can get
10894 // the smaller value for free with a truncate.
10895 Register Value = MemSetValue;
10896 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10897 MVT VT = getMVTForLLT(Ty);
10898 MVT LargestVT = getMVTForLLT(LargestTy);
10899 if (!LargestTy.isVector() && !Ty.isVector() &&
10900 TLI.isTruncateFree(LargestVT, VT))
10901 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10902 else
10903 Value = getMemsetValue(Val, Ty, MIB);
10904 if (!Value)
10905 return UnableToLegalize;
10906 }
10907
10908 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10909
10910 Register Ptr = Dst;
10911 if (DstOff != 0) {
10912 auto Offset =
10913 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10914 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10915 }
10916
10917 MIB.buildStore(Value, Ptr, *StoreMMO);
10918 DstOff += Ty.getSizeInBytes();
10919 Size -= TySize;
10920 }
10921
10922 MI.eraseFromParent();
10923 return Legalized;
10924}
10925
10927LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10928 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10929
10930 auto [Dst, Src, Len] = MI.getFirst3Regs();
10931
10932 const auto *MMOIt = MI.memoperands_begin();
10933 const MachineMemOperand *MemOp = *MMOIt;
10934 bool IsVolatile = MemOp->isVolatile();
10935
10936 // See if this is a constant length copy
10937 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10938 // FIXME: support dynamically sized G_MEMCPY_INLINE
10939 assert(LenVRegAndVal &&
10940 "inline memcpy with dynamic size is not yet supported");
10941 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10942 if (KnownLen == 0) {
10943 MI.eraseFromParent();
10944 return Legalized;
10945 }
10946
10947 const auto &DstMMO = **MI.memoperands_begin();
10948 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10949 Align DstAlign = DstMMO.getBaseAlign();
10950 Align SrcAlign = SrcMMO.getBaseAlign();
10951
10952 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10953 IsVolatile);
10954}
10955
10957LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10958 uint64_t KnownLen, Align DstAlign,
10959 Align SrcAlign, bool IsVolatile) {
10960 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10961 return lowerMemcpy(MI, Dst, Src, KnownLen,
10962 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10963 IsVolatile);
10964}
10965
10967LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10968 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10969 Align SrcAlign, bool IsVolatile) {
10970 auto &MF = *MI.getParent()->getParent();
10971 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10972 auto &DL = MF.getDataLayout();
10974
10975 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10976
10977 bool DstAlignCanChange = false;
10978 MachineFrameInfo &MFI = MF.getFrameInfo();
10979 Align Alignment = std::min(DstAlign, SrcAlign);
10980
10981 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10982 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10983 DstAlignCanChange = true;
10984
10985 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10986 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10987 // if the memcpy is in a tail call position.
10988
10989 std::vector<LLT> MemOps;
10990
10991 const auto &DstMMO = **MI.memoperands_begin();
10992 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10993 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10994 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10995
10997 MemOps, Limit,
10998 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10999 IsVolatile),
11000 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
11001 MF.getFunction().getAttributes(), TLI))
11002 return UnableToLegalize;
11003
11004 if (DstAlignCanChange) {
11005 // Get an estimate of the type from the LLT.
11006 Type *IRTy = getTypeForLLT(MemOps[0], C);
11007 Align NewAlign = DL.getABITypeAlign(IRTy);
11008
11009 // Don't promote to an alignment that would require dynamic stack
11010 // realignment.
11012 if (!TRI->hasStackRealignment(MF))
11013 if (MaybeAlign StackAlign = DL.getStackAlignment())
11014 NewAlign = std::min(NewAlign, *StackAlign);
11015
11016 if (NewAlign > Alignment) {
11017 Alignment = NewAlign;
11018 unsigned FI = FIDef->getOperand(1).getIndex();
11019 // Give the stack frame object a larger alignment if needed.
11020 if (MFI.getObjectAlign(FI) < Alignment)
11021 MFI.setObjectAlignment(FI, Alignment);
11022 }
11023 }
11024
11025 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
11026
11027 MachineIRBuilder MIB(MI);
11028 // Now we need to emit a pair of load and stores for each of the types we've
11029 // collected. I.e. for each type, generate a load from the source pointer of
11030 // that type width, and then generate a corresponding store to the dest buffer
11031 // of that value loaded. This can result in a sequence of loads and stores
11032 // mixed types, depending on what the target specifies as good types to use.
11033 unsigned CurrOffset = 0;
11034 unsigned Size = KnownLen;
11035 for (auto CopyTy : MemOps) {
11036 // Issuing an unaligned load / store pair that overlaps with the previous
11037 // pair. Adjust the offset accordingly.
11038 if (CopyTy.getSizeInBytes() > Size)
11039 CurrOffset -= CopyTy.getSizeInBytes() - Size;
11040
11041 // Construct MMOs for the accesses.
11042 auto *LoadMMO =
11043 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
11044 auto *StoreMMO =
11045 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
11046
11047 // Create the load.
11048 Register LoadPtr = Src;
11050 if (CurrOffset != 0) {
11051 LLT SrcTy = MRI.getType(Src);
11052 Offset =
11053 MIB.buildConstant(LLT::integer(SrcTy.getSizeInBits()), CurrOffset)
11054 .getReg(0);
11055 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
11056 }
11057 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11058
11059 // Create the store.
11060 Register StorePtr = Dst;
11061 if (CurrOffset != 0) {
11062 LLT DstTy = MRI.getType(Dst);
11063 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
11064 }
11065 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11066 CurrOffset += CopyTy.getSizeInBytes();
11067 Size -= CopyTy.getSizeInBytes();
11068 }
11069
11070 MI.eraseFromParent();
11071 return Legalized;
11072}
11073
11075LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
11076 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
11077 bool IsVolatile) {
11078 auto &MF = *MI.getParent()->getParent();
11079 const auto &TLI = *MF.getSubtarget().getTargetLowering();
11080 auto &DL = MF.getDataLayout();
11081 LLVMContext &C = MF.getFunction().getContext();
11082
11083 assert(KnownLen != 0 && "Have a zero length memmove length!");
11084
11085 bool DstAlignCanChange = false;
11086 MachineFrameInfo &MFI = MF.getFrameInfo();
11087 bool OptSize = shouldLowerMemFuncForSize(MF);
11088 Align Alignment = std::min(DstAlign, SrcAlign);
11089
11090 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11091 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
11092 DstAlignCanChange = true;
11093
11094 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11095 std::vector<LLT> MemOps;
11096
11097 const auto &DstMMO = **MI.memoperands_begin();
11098 const auto &SrcMMO = **std::next(MI.memoperands_begin());
11099 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11100 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11101
11102 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
11103 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
11104 // same thing here.
11106 MemOps, Limit,
11107 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11108 /*IsVolatile*/ true),
11109 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
11110 MF.getFunction().getAttributes(), TLI))
11111 return UnableToLegalize;
11112
11113 if (DstAlignCanChange) {
11114 // Get an estimate of the type from the LLT.
11115 Type *IRTy = getTypeForLLT(MemOps[0], C);
11116 Align NewAlign = DL.getABITypeAlign(IRTy);
11117
11118 // Don't promote to an alignment that would require dynamic stack
11119 // realignment.
11120 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
11121 if (!TRI->hasStackRealignment(MF))
11122 if (MaybeAlign StackAlign = DL.getStackAlignment())
11123 NewAlign = std::min(NewAlign, *StackAlign);
11124
11125 if (NewAlign > Alignment) {
11126 Alignment = NewAlign;
11127 unsigned FI = FIDef->getOperand(1).getIndex();
11128 // Give the stack frame object a larger alignment if needed.
11129 if (MFI.getObjectAlign(FI) < Alignment)
11130 MFI.setObjectAlignment(FI, Alignment);
11131 }
11132 }
11133
11134 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
11135
11136 MachineIRBuilder MIB(MI);
11137 // Memmove requires that we perform the loads first before issuing the stores.
11138 // Apart from that, this loop is pretty much doing the same thing as the
11139 // memcpy codegen function.
11140 unsigned CurrOffset = 0;
11141 SmallVector<Register, 16> LoadVals;
11142 for (auto CopyTy : MemOps) {
11143 // Construct MMO for the load.
11144 auto *LoadMMO =
11145 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
11146
11147 // Create the load.
11148 Register LoadPtr = Src;
11149 if (CurrOffset != 0) {
11150 LLT SrcTy = MRI.getType(Src);
11151 auto Offset =
11152 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
11153 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
11154 }
11155 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11156 CurrOffset += CopyTy.getSizeInBytes();
11157 }
11158
11159 CurrOffset = 0;
11160 for (unsigned I = 0; I < MemOps.size(); ++I) {
11161 LLT CopyTy = MemOps[I];
11162 // Now store the values loaded.
11163 auto *StoreMMO =
11164 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
11165
11166 Register StorePtr = Dst;
11167 if (CurrOffset != 0) {
11168 LLT DstTy = MRI.getType(Dst);
11169 auto Offset =
11170 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
11171 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
11172 }
11173 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
11174 CurrOffset += CopyTy.getSizeInBytes();
11175 }
11176 MI.eraseFromParent();
11177 return Legalized;
11178}
11179
11182 const unsigned Opc = MI.getOpcode();
11183 // This combine is fairly complex so it's not written with a separate
11184 // matcher function.
11185 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
11186 Opc == TargetOpcode::G_MEMSET) &&
11187 "Expected memcpy like instruction");
11188
11189 auto MMOIt = MI.memoperands_begin();
11190 const MachineMemOperand *MemOp = *MMOIt;
11191
11192 Align DstAlign = MemOp->getBaseAlign();
11193 Align SrcAlign;
11194 auto [Dst, Src, Len] = MI.getFirst3Regs();
11195
11196 if (Opc != TargetOpcode::G_MEMSET) {
11197 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
11198 MemOp = *(++MMOIt);
11199 SrcAlign = MemOp->getBaseAlign();
11200 }
11201
11202 // See if this is a constant length copy
11203 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
11204 if (!LenVRegAndVal)
11205 return UnableToLegalize;
11206 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11207
11208 if (KnownLen == 0) {
11209 MI.eraseFromParent();
11210 return Legalized;
11211 }
11212
11213 if (MaxLen && KnownLen > MaxLen)
11214 return UnableToLegalize;
11215
11216 bool IsVolatile = MemOp->isVolatile();
11217 if (Opc == TargetOpcode::G_MEMCPY) {
11218 auto &MF = *MI.getParent()->getParent();
11219 const auto &TLI = *MF.getSubtarget().getTargetLowering();
11220 bool OptSize = shouldLowerMemFuncForSize(MF);
11221 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11222 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11223 IsVolatile);
11224 }
11225 if (Opc == TargetOpcode::G_MEMMOVE)
11226 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11227 if (Opc == TargetOpcode::G_MEMSET)
11228 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
11229 return UnableToLegalize;
11230}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
BinaryOperator * Mul
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:755
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:754
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:749
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750
bool isSigned() const
Definition InstrTypes.h:993
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMulfix(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:645
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1984
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1584
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1151
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1151
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:508
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1884
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1239
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:611
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.