LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FMODF:
475 RTLIBCASE(MODF_F);
476 case TargetOpcode::G_FLOG10:
477 RTLIBCASE(LOG10_F);
478 case TargetOpcode::G_FLOG:
479 RTLIBCASE(LOG_F);
480 case TargetOpcode::G_FLOG2:
481 RTLIBCASE(LOG2_F);
482 case TargetOpcode::G_FLDEXP:
483 RTLIBCASE(LDEXP_F);
484 case TargetOpcode::G_FCEIL:
485 RTLIBCASE(CEIL_F);
486 case TargetOpcode::G_FFLOOR:
487 RTLIBCASE(FLOOR_F);
488 case TargetOpcode::G_FMINNUM:
489 RTLIBCASE(FMIN_F);
490 case TargetOpcode::G_FMAXNUM:
491 RTLIBCASE(FMAX_F);
492 case TargetOpcode::G_FMINIMUMNUM:
493 RTLIBCASE(FMINIMUM_NUM_F);
494 case TargetOpcode::G_FMAXIMUMNUM:
495 RTLIBCASE(FMAXIMUM_NUM_F);
496 case TargetOpcode::G_FSQRT:
497 RTLIBCASE(SQRT_F);
498 case TargetOpcode::G_FRINT:
499 RTLIBCASE(RINT_F);
500 case TargetOpcode::G_FNEARBYINT:
501 RTLIBCASE(NEARBYINT_F);
502 case TargetOpcode::G_INTRINSIC_TRUNC:
503 RTLIBCASE(TRUNC_F);
504 case TargetOpcode::G_INTRINSIC_ROUND:
505 RTLIBCASE(ROUND_F);
506 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
507 RTLIBCASE(ROUNDEVEN_F);
508 case TargetOpcode::G_INTRINSIC_LRINT:
509 RTLIBCASE(LRINT_F);
510 case TargetOpcode::G_INTRINSIC_LLRINT:
511 RTLIBCASE(LLRINT_F);
512 }
513 llvm_unreachable("Unknown libcall function");
514#undef RTLIBCASE_INT
515#undef RTLIBCASE
516}
517
518/// True if an instruction is in tail position in its caller. Intended for
519/// legalizing libcalls as tail calls when possible.
522 const TargetInstrInfo &TII,
524 MachineBasicBlock &MBB = *MI.getParent();
525 const Function &F = MBB.getParent()->getFunction();
526
527 // Conservatively require the attributes of the call to match those of
528 // the return. Ignore NoAlias and NonNull because they don't affect the
529 // call sequence.
530 AttributeList CallerAttrs = F.getAttributes();
531 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
532 .removeAttribute(Attribute::NoAlias)
533 .removeAttribute(Attribute::NonNull)
534 .hasAttributes())
535 return false;
536
537 // It's not safe to eliminate the sign / zero extension of the return value.
538 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
539 CallerAttrs.hasRetAttr(Attribute::SExt))
540 return false;
541
542 // Only tail call if the following instruction is a standard return or if we
543 // have a `thisreturn` callee, and a sequence like:
544 //
545 // G_MEMCPY %0, %1, %2
546 // $x0 = COPY %0
547 // RET_ReallyLR implicit $x0
548 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
549 if (Next != MBB.instr_end() && Next->isCopy()) {
550 if (MI.getOpcode() == TargetOpcode::G_BZERO)
551 return false;
552
553 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
554 // mempy/etc routines return the same parameter. For other it will be the
555 // returned value.
556 Register VReg = MI.getOperand(0).getReg();
557 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
558 return false;
559
560 Register PReg = Next->getOperand(0).getReg();
561 if (!PReg.isPhysical())
562 return false;
563
564 auto Ret = next_nodbg(Next, MBB.instr_end());
565 if (Ret == MBB.instr_end() || !Ret->isReturn())
566 return false;
567
568 if (Ret->getNumImplicitOperands() != 1)
569 return false;
570
571 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
572 return false;
573
574 // Skip over the COPY that we just validated.
575 Next = Ret;
576 }
577
578 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
579 return false;
580
581 return true;
582}
583
585llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
586 const CallLowering::ArgInfo &Result,
588 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
589 MachineInstr *MI) {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
633 const CallLowering::ArgInfo &Result,
635 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
636 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
637 const char *Name = TLI.getLibcallName(Libcall);
638 if (!Name)
640 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
641 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
642}
643
644// Useful for libcalls where all operands have the same type.
647 Type *OpType, LostDebugLocObserver &LocObserver) {
648 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
649
650 // FIXME: What does the original arg index mean here?
652 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
653 Args.push_back({MO.getReg(), OpType, 0});
654 return createLibcall(MIRBuilder, Libcall,
655 {MI.getOperand(0).getReg(), OpType, 0}, Args,
656 LocObserver, &MI);
657}
658
659LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
660 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
661 LostDebugLocObserver &LocObserver) {
662 MachineFunction &MF = *MI.getMF();
663 MachineRegisterInfo &MRI = MF.getRegInfo();
664
665 Register DstSin = MI.getOperand(0).getReg();
666 Register DstCos = MI.getOperand(1).getReg();
667 Register Src = MI.getOperand(2).getReg();
668 LLT DstTy = MRI.getType(DstSin);
669
670 int MemSize = DstTy.getSizeInBytes();
671 Align Alignment = getStackTemporaryAlignment(DstTy);
672 const DataLayout &DL = MIRBuilder.getDataLayout();
673 unsigned AddrSpace = DL.getAllocaAddrSpace();
674 MachinePointerInfo PtrInfo;
675
676 Register StackPtrSin =
677 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
678 .getReg(0);
679 Register StackPtrCos =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682
683 auto &Ctx = MF.getFunction().getContext();
684 auto LibcallResult =
686 {{0}, Type::getVoidTy(Ctx), 0},
687 {{Src, OpType, 0},
688 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
689 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
690 LocObserver, &MI);
691
692 if (LibcallResult != LegalizeResult::Legalized)
694
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
699
700 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
701 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
702 MI.eraseFromParent();
703
705}
706
708LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
709 unsigned Size, Type *OpType,
710 LostDebugLocObserver &LocObserver) {
711 MachineFunction &MF = MIRBuilder.getMF();
712 MachineRegisterInfo &MRI = MF.getRegInfo();
713
714 Register DstFrac = MI.getOperand(0).getReg();
715 Register DstInt = MI.getOperand(1).getReg();
716 Register Src = MI.getOperand(2).getReg();
717 LLT DstTy = MRI.getType(DstFrac);
718
719 int MemSize = DstTy.getSizeInBytes();
720 Align Alignment = getStackTemporaryAlignment(DstTy);
721 const DataLayout &DL = MIRBuilder.getDataLayout();
722 unsigned AddrSpace = DL.getAllocaAddrSpace();
723 MachinePointerInfo PtrInfo;
724
725 Register StackPtrInt =
726 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
727 .getReg(0);
728
729 auto &Ctx = MF.getFunction().getContext();
730 auto LibcallResult = createLibcall(
731 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
732 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
733 LocObserver, &MI);
734
735 if (LibcallResult != LegalizeResult::Legalized)
737
739 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
740
741 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
742 MI.eraseFromParent();
743
745}
746
749 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
750 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
751
753 // Add all the args, except for the last which is an imm denoting 'tail'.
754 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
755 Register Reg = MI.getOperand(i).getReg();
756
757 // Need derive an IR type for call lowering.
758 LLT OpLLT = MRI.getType(Reg);
759 Type *OpTy = nullptr;
760 if (OpLLT.isPointer())
761 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
762 else
763 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
764 Args.push_back({Reg, OpTy, 0});
765 }
766
767 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
768 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
769 RTLIB::Libcall RTLibcall;
770 unsigned Opc = MI.getOpcode();
771 const char *Name;
772 switch (Opc) {
773 case TargetOpcode::G_BZERO:
774 RTLibcall = RTLIB::BZERO;
775 Name = TLI.getLibcallName(RTLibcall);
776 break;
777 case TargetOpcode::G_MEMCPY:
778 RTLibcall = RTLIB::MEMCPY;
779 Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
780 Args[0].Flags[0].setReturned();
781 break;
782 case TargetOpcode::G_MEMMOVE:
783 RTLibcall = RTLIB::MEMMOVE;
784 Name = TLI.getLibcallName(RTLibcall);
785 Args[0].Flags[0].setReturned();
786 break;
787 case TargetOpcode::G_MEMSET:
788 RTLibcall = RTLIB::MEMSET;
789 Name = TLI.getLibcallName(RTLibcall);
790 Args[0].Flags[0].setReturned();
791 break;
792 default:
793 llvm_unreachable("unsupported opcode");
794 }
795
796 // Unsupported libcall on the target.
797 if (!Name) {
798 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
799 << MIRBuilder.getTII().getName(Opc) << "\n");
801 }
802
804 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
805 Info.Callee = MachineOperand::CreateES(Name);
806 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
807 Info.IsTailCall =
808 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
809 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
810
811 llvm::append_range(Info.OrigArgs, Args);
812 if (!CLI.lowerCall(MIRBuilder, Info))
814
815 if (Info.LoweredTailCall) {
816 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
817
818 // Check debug locations before removing the return.
819 LocObserver.checkpoint(true);
820
821 // We must have a return following the call (or debug insts) to get past
822 // isLibCallInTailPosition.
823 do {
824 MachineInstr *Next = MI.getNextNode();
825 assert(Next &&
826 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
827 "Expected instr following MI to be return or debug inst?");
828 // We lowered a tail call, so the call is now the return from the block.
829 // Delete the old return.
830 Next->eraseFromParent();
831 } while (MI.getNextNode());
832
833 // We expect to lose the debug location from the return.
834 LocObserver.checkpoint(false);
835 }
836
838}
839
840static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
841 unsigned Opc = MI.getOpcode();
842 auto &AtomicMI = cast<GMemOperation>(MI);
843 auto &MMO = AtomicMI.getMMO();
844 auto Ordering = MMO.getMergedOrdering();
845 LLT MemType = MMO.getMemoryType();
846 uint64_t MemSize = MemType.getSizeInBytes();
847 if (MemType.isVector())
848 return RTLIB::UNKNOWN_LIBCALL;
849
850#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
851#define LCALL5(A) \
852 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
856 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
857 return getOutlineAtomicHelper(LC, Ordering, MemSize);
858 }
859 case TargetOpcode::G_ATOMICRMW_XCHG: {
860 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
861 return getOutlineAtomicHelper(LC, Ordering, MemSize);
862 }
863 case TargetOpcode::G_ATOMICRMW_ADD:
864 case TargetOpcode::G_ATOMICRMW_SUB: {
865 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
866 return getOutlineAtomicHelper(LC, Ordering, MemSize);
867 }
868 case TargetOpcode::G_ATOMICRMW_AND: {
869 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
870 return getOutlineAtomicHelper(LC, Ordering, MemSize);
871 }
872 case TargetOpcode::G_ATOMICRMW_OR: {
873 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
874 return getOutlineAtomicHelper(LC, Ordering, MemSize);
875 }
876 case TargetOpcode::G_ATOMICRMW_XOR: {
877 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
878 return getOutlineAtomicHelper(LC, Ordering, MemSize);
879 }
880 default:
881 return RTLIB::UNKNOWN_LIBCALL;
882 }
883#undef LCALLS
884#undef LCALL5
885}
886
889 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
890
891 Type *RetTy;
892 SmallVector<Register> RetRegs;
894 unsigned Opc = MI.getOpcode();
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 LLT SuccessLLT;
900 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
901 MI.getFirst4RegLLTs();
902 RetRegs.push_back(Ret);
903 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
904 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
905 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
906 NewLLT) = MI.getFirst5RegLLTs();
907 RetRegs.push_back(Success);
908 RetTy = StructType::get(
909 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
910 }
911 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
912 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
913 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
914 break;
915 }
916 case TargetOpcode::G_ATOMICRMW_XCHG:
917 case TargetOpcode::G_ATOMICRMW_ADD:
918 case TargetOpcode::G_ATOMICRMW_SUB:
919 case TargetOpcode::G_ATOMICRMW_AND:
920 case TargetOpcode::G_ATOMICRMW_OR:
921 case TargetOpcode::G_ATOMICRMW_XOR: {
922 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
923 RetRegs.push_back(Ret);
924 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
925 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
926 Val =
927 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
928 .getReg(0);
929 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
930 Val =
931 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
932 .getReg(0);
933 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
934 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
935 break;
936 }
937 default:
938 llvm_unreachable("unsupported opcode");
939 }
940
941 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
942 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
943 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
944 const char *Name = TLI.getLibcallName(RTLibcall);
945
946 // Unsupported libcall on the target.
947 if (!Name) {
948 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
949 << MIRBuilder.getTII().getName(Opc) << "\n");
951 }
952
954 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
955 Info.Callee = MachineOperand::CreateES(Name);
956 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
957
958 llvm::append_range(Info.OrigArgs, Args);
959 if (!CLI.lowerCall(MIRBuilder, Info))
961
963}
964
965static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
966 Type *FromType) {
967 auto ToMVT = MVT::getVT(ToType);
968 auto FromMVT = MVT::getVT(FromType);
969
970 switch (Opcode) {
971 case TargetOpcode::G_FPEXT:
972 return RTLIB::getFPEXT(FromMVT, ToMVT);
973 case TargetOpcode::G_FPTRUNC:
974 return RTLIB::getFPROUND(FromMVT, ToMVT);
975 case TargetOpcode::G_FPTOSI:
976 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
977 case TargetOpcode::G_FPTOUI:
978 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
979 case TargetOpcode::G_SITOFP:
980 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
981 case TargetOpcode::G_UITOFP:
982 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
983 }
984 llvm_unreachable("Unsupported libcall function");
985}
986
989 Type *FromType, LostDebugLocObserver &LocObserver,
990 const TargetLowering &TLI, bool IsSigned = false) {
991 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
992 if (FromType->isIntegerTy()) {
993 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
994 Arg.Flags[0].setSExt();
995 else
996 Arg.Flags[0].setZExt();
997 }
998
999 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
1000 return createLibcall(MIRBuilder, Libcall,
1001 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1002 &MI);
1003}
1004
1005static RTLIB::Libcall
1007 RTLIB::Libcall RTLibcall;
1008 switch (MI.getOpcode()) {
1009 case TargetOpcode::G_GET_FPENV:
1010 RTLibcall = RTLIB::FEGETENV;
1011 break;
1012 case TargetOpcode::G_SET_FPENV:
1013 case TargetOpcode::G_RESET_FPENV:
1014 RTLibcall = RTLIB::FESETENV;
1015 break;
1016 case TargetOpcode::G_GET_FPMODE:
1017 RTLibcall = RTLIB::FEGETMODE;
1018 break;
1019 case TargetOpcode::G_SET_FPMODE:
1020 case TargetOpcode::G_RESET_FPMODE:
1021 RTLibcall = RTLIB::FESETMODE;
1022 break;
1023 default:
1024 llvm_unreachable("Unexpected opcode");
1025 }
1026 return RTLibcall;
1027}
1028
1029// Some library functions that read FP state (fegetmode, fegetenv) write the
1030// state into a region in memory. IR intrinsics that do the same operations
1031// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1032// intrinsics via the library functions, we need to use temporary variable,
1033// for example:
1034//
1035// %0:_(s32) = G_GET_FPMODE
1036//
1037// is transformed to:
1038//
1039// %1:_(p0) = G_FRAME_INDEX %stack.0
1040// BL &fegetmode
1041// %0:_(s32) = G_LOAD % 1
1042//
1044LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1046 LostDebugLocObserver &LocObserver) {
1047 const DataLayout &DL = MIRBuilder.getDataLayout();
1048 auto &MF = MIRBuilder.getMF();
1049 auto &MRI = *MIRBuilder.getMRI();
1050 auto &Ctx = MF.getFunction().getContext();
1051
1052 // Create temporary, where library function will put the read state.
1053 Register Dst = MI.getOperand(0).getReg();
1054 LLT StateTy = MRI.getType(Dst);
1055 TypeSize StateSize = StateTy.getSizeInBytes();
1056 Align TempAlign = getStackTemporaryAlignment(StateTy);
1057 MachinePointerInfo TempPtrInfo;
1058 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1059
1060 // Create a call to library function, with the temporary as an argument.
1061 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1062 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1063 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1064 auto Res =
1065 createLibcall(MIRBuilder, RTLibcall,
1066 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1068 LocObserver, nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1086 LostDebugLocObserver &LocObserver) {
1087 const DataLayout &DL = MIRBuilder.getDataLayout();
1088 auto &MF = MIRBuilder.getMF();
1089 auto &MRI = *MIRBuilder.getMRI();
1090 auto &Ctx = MF.getFunction().getContext();
1091
1092 // Create temporary, where library function will get the new state.
1093 Register Src = MI.getOperand(0).getReg();
1094 LLT StateTy = MRI.getType(Src);
1095 TypeSize StateSize = StateTy.getSizeInBytes();
1096 Align TempAlign = getStackTemporaryAlignment(StateTy);
1097 MachinePointerInfo TempPtrInfo;
1098 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1099
1100 // Put the new state into the temporary.
1101 MachineMemOperand *MMO = MF.getMachineMemOperand(
1102 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1103 MIRBuilder.buildStore(Src, Temp, *MMO);
1104
1105 // Create a call to library function, with the temporary as an argument.
1106 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1107 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1108 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1109 return createLibcall(MIRBuilder, RTLibcall,
1110 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1111 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1112 LocObserver, nullptr);
1113}
1114
1115/// Returns the corresponding libcall for the given Pred and
1116/// the ICMP predicate that should be generated to compare with #0
1117/// after the libcall.
1118static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1120#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1121 do { \
1122 switch (Size) { \
1123 case 32: \
1124 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1125 case 64: \
1126 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1127 case 128: \
1128 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1129 default: \
1130 llvm_unreachable("unexpected size"); \
1131 } \
1132 } while (0)
1133
1134 switch (Pred) {
1135 case CmpInst::FCMP_OEQ:
1137 case CmpInst::FCMP_UNE:
1139 case CmpInst::FCMP_OGE:
1141 case CmpInst::FCMP_OLT:
1143 case CmpInst::FCMP_OLE:
1145 case CmpInst::FCMP_OGT:
1147 case CmpInst::FCMP_UNO:
1149 default:
1150 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1151 }
1152}
1153
1155LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1157 LostDebugLocObserver &LocObserver) {
1158 auto &MF = MIRBuilder.getMF();
1159 auto &Ctx = MF.getFunction().getContext();
1160 const GFCmp *Cmp = cast<GFCmp>(&MI);
1161
1162 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1163 unsigned Size = OpLLT.getSizeInBits();
1164 if ((Size != 32 && Size != 64 && Size != 128) ||
1165 OpLLT != MRI.getType(Cmp->getRHSReg()))
1166 return UnableToLegalize;
1167
1168 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1169
1170 // DstReg type is s32
1171 const Register DstReg = Cmp->getReg(0);
1172 LLT DstTy = MRI.getType(DstReg);
1173 const auto Cond = Cmp->getCond();
1174
1175 // Reference:
1176 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1177 // Generates a libcall followed by ICMP.
1178 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1179 const CmpInst::Predicate ICmpPred,
1180 const DstOp &Res) -> Register {
1181 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1182 constexpr LLT TempLLT = LLT::scalar(32);
1183 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1184 // Generate libcall, holding result in Temp
1185 const auto Status = createLibcall(
1186 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1187 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1188 LocObserver, &MI);
1189 if (!Status)
1190 return {};
1191
1192 // Compare temp with #0 to get the final result.
1193 return MIRBuilder
1194 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1195 .getReg(0);
1196 };
1197
1198 // Simple case if we have a direct mapping from predicate to libcall
1199 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1200 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1201 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1202 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1203 return Legalized;
1204 }
1205 return UnableToLegalize;
1206 }
1207
1208 // No direct mapping found, should be generated as combination of libcalls.
1209
1210 switch (Cond) {
1211 case CmpInst::FCMP_UEQ: {
1212 // FCMP_UEQ: unordered or equal
1213 // Convert into (FCMP_OEQ || FCMP_UNO).
1214
1215 const auto [OeqLibcall, OeqPred] =
1217 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1218
1219 const auto [UnoLibcall, UnoPred] =
1221 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1222 if (Oeq && Uno)
1223 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1224 else
1225 return UnableToLegalize;
1226
1227 break;
1228 }
1229 case CmpInst::FCMP_ONE: {
1230 // FCMP_ONE: ordered and operands are unequal
1231 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1232
1233 // We inverse the predicate instead of generating a NOT
1234 // to save one instruction.
1235 // On AArch64 isel can even select two cmp into a single ccmp.
1236 const auto [OeqLibcall, OeqPred] =
1238 const auto NotOeq =
1239 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1240
1241 const auto [UnoLibcall, UnoPred] =
1243 const auto NotUno =
1244 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1245
1246 if (NotOeq && NotUno)
1247 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1248 else
1249 return UnableToLegalize;
1250
1251 break;
1252 }
1253 case CmpInst::FCMP_ULT:
1254 case CmpInst::FCMP_UGE:
1255 case CmpInst::FCMP_UGT:
1256 case CmpInst::FCMP_ULE:
1257 case CmpInst::FCMP_ORD: {
1258 // Convert into: !(inverse(Pred))
1259 // E.g. FCMP_ULT becomes !FCMP_OGE
1260 // This is equivalent to the following, but saves some instructions.
1261 // MIRBuilder.buildNot(
1262 // PredTy,
1263 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1264 // Op1, Op2));
1265 const auto [InversedLibcall, InversedPred] =
1267 if (!BuildLibcall(InversedLibcall,
1268 CmpInst::getInversePredicate(InversedPred), DstReg))
1269 return UnableToLegalize;
1270 break;
1271 }
1272 default:
1273 return UnableToLegalize;
1274 }
1275
1276 return Legalized;
1277}
1278
1279// The function is used to legalize operations that set default environment
1280// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1281// On most targets supported in glibc FE_DFL_MODE is defined as
1282// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1283// it is not true, the target must provide custom lowering.
1285LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1287 LostDebugLocObserver &LocObserver) {
1288 const DataLayout &DL = MIRBuilder.getDataLayout();
1289 auto &MF = MIRBuilder.getMF();
1290 auto &Ctx = MF.getFunction().getContext();
1291
1292 // Create an argument for the library function.
1293 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1294 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1295 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1296 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1297 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1298 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1299 MIRBuilder.buildIntToPtr(Dest, DefValue);
1300
1301 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1302 return createLibcall(MIRBuilder, RTLibcall,
1303 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1304 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1305 LocObserver, &MI);
1306}
1307
1310 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1311
1312 switch (MI.getOpcode()) {
1313 default:
1314 return UnableToLegalize;
1315 case TargetOpcode::G_MUL:
1316 case TargetOpcode::G_SDIV:
1317 case TargetOpcode::G_UDIV:
1318 case TargetOpcode::G_SREM:
1319 case TargetOpcode::G_UREM:
1320 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1321 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1322 unsigned Size = LLTy.getSizeInBits();
1323 Type *HLTy = IntegerType::get(Ctx, Size);
1324 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1325 if (Status != Legalized)
1326 return Status;
1327 break;
1328 }
1329 case TargetOpcode::G_FADD:
1330 case TargetOpcode::G_FSUB:
1331 case TargetOpcode::G_FMUL:
1332 case TargetOpcode::G_FDIV:
1333 case TargetOpcode::G_FMA:
1334 case TargetOpcode::G_FPOW:
1335 case TargetOpcode::G_FREM:
1336 case TargetOpcode::G_FCOS:
1337 case TargetOpcode::G_FSIN:
1338 case TargetOpcode::G_FTAN:
1339 case TargetOpcode::G_FACOS:
1340 case TargetOpcode::G_FASIN:
1341 case TargetOpcode::G_FATAN:
1342 case TargetOpcode::G_FATAN2:
1343 case TargetOpcode::G_FCOSH:
1344 case TargetOpcode::G_FSINH:
1345 case TargetOpcode::G_FTANH:
1346 case TargetOpcode::G_FLOG10:
1347 case TargetOpcode::G_FLOG:
1348 case TargetOpcode::G_FLOG2:
1349 case TargetOpcode::G_FEXP:
1350 case TargetOpcode::G_FEXP2:
1351 case TargetOpcode::G_FEXP10:
1352 case TargetOpcode::G_FCEIL:
1353 case TargetOpcode::G_FFLOOR:
1354 case TargetOpcode::G_FMINNUM:
1355 case TargetOpcode::G_FMAXNUM:
1356 case TargetOpcode::G_FMINIMUMNUM:
1357 case TargetOpcode::G_FMAXIMUMNUM:
1358 case TargetOpcode::G_FSQRT:
1359 case TargetOpcode::G_FRINT:
1360 case TargetOpcode::G_FNEARBYINT:
1361 case TargetOpcode::G_INTRINSIC_TRUNC:
1362 case TargetOpcode::G_INTRINSIC_ROUND:
1363 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1364 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1365 unsigned Size = LLTy.getSizeInBits();
1366 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1367 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1368 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1369 return UnableToLegalize;
1370 }
1371 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1372 if (Status != Legalized)
1373 return Status;
1374 break;
1375 }
1376 case TargetOpcode::G_FSINCOS: {
1377 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1378 unsigned Size = LLTy.getSizeInBits();
1379 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1380 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1381 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1382 return UnableToLegalize;
1383 }
1384 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1385 }
1386 case TargetOpcode::G_FMODF: {
1387 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1388 unsigned Size = LLTy.getSizeInBits();
1389 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1390 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1391 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1392 return UnableToLegalize;
1393 }
1394 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1395 }
1396 case TargetOpcode::G_LROUND:
1397 case TargetOpcode::G_LLROUND:
1398 case TargetOpcode::G_INTRINSIC_LRINT:
1399 case TargetOpcode::G_INTRINSIC_LLRINT: {
1400 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1401 unsigned Size = LLTy.getSizeInBits();
1402 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1403 Type *ITy = IntegerType::get(
1404 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1405 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1406 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1407 return UnableToLegalize;
1408 }
1409 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1411 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1412 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1413 if (Status != Legalized)
1414 return Status;
1415 MI.eraseFromParent();
1416 return Legalized;
1417 }
1418 case TargetOpcode::G_FPOWI:
1419 case TargetOpcode::G_FLDEXP: {
1420 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1421 unsigned Size = LLTy.getSizeInBits();
1422 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1423 Type *ITy = IntegerType::get(
1424 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1425 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1426 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1427 return UnableToLegalize;
1428 }
1429 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1431 {MI.getOperand(1).getReg(), HLTy, 0},
1432 {MI.getOperand(2).getReg(), ITy, 1}};
1433 Args[1].Flags[0].setSExt();
1435 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1436 Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1448 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1465 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1469 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1483 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1484 LocObserver, TLI, IsSigned);
1485 if (Status != Legalized)
1486 return Status;
1487 break;
1488 }
1489 case TargetOpcode::G_ATOMICRMW_XCHG:
1490 case TargetOpcode::G_ATOMICRMW_ADD:
1491 case TargetOpcode::G_ATOMICRMW_SUB:
1492 case TargetOpcode::G_ATOMICRMW_AND:
1493 case TargetOpcode::G_ATOMICRMW_OR:
1494 case TargetOpcode::G_ATOMICRMW_XOR:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG:
1496 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1498 if (Status != Legalized)
1499 return Status;
1500 break;
1501 }
1502 case TargetOpcode::G_BZERO:
1503 case TargetOpcode::G_MEMCPY:
1504 case TargetOpcode::G_MEMMOVE:
1505 case TargetOpcode::G_MEMSET: {
1506 LegalizeResult Result =
1507 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1508 if (Result != Legalized)
1509 return Result;
1510 MI.eraseFromParent();
1511 return Result;
1512 }
1513 case TargetOpcode::G_GET_FPENV:
1514 case TargetOpcode::G_GET_FPMODE: {
1515 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_SET_FPENV:
1521 case TargetOpcode::G_SET_FPMODE: {
1522 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 case TargetOpcode::G_RESET_FPENV:
1528 case TargetOpcode::G_RESET_FPMODE: {
1529 LegalizeResult Result =
1530 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1531 if (Result != Legalized)
1532 return Result;
1533 break;
1534 }
1535 }
1536
1537 MI.eraseFromParent();
1538 return Legalized;
1539}
1540
1542 unsigned TypeIdx,
1543 LLT NarrowTy) {
1544 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1545 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 return UnableToLegalize;
1550 case TargetOpcode::G_IMPLICIT_DEF: {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 LLT DstTy = MRI.getType(DstReg);
1553
1554 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1555 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1556 // FIXME: Although this would also be legal for the general case, it causes
1557 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1558 // combines not being hit). This seems to be a problem related to the
1559 // artifact combiner.
1560 if (SizeOp0 % NarrowSize != 0) {
1561 LLT ImplicitTy = NarrowTy;
1562 if (DstTy.isVector())
1563 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1564
1565 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1566 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1567
1568 MI.eraseFromParent();
1569 return Legalized;
1570 }
1571
1572 int NumParts = SizeOp0 / NarrowSize;
1573
1575 for (int i = 0; i < NumParts; ++i)
1576 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1577
1578 if (DstTy.isVector())
1579 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1580 else
1581 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_CONSTANT: {
1586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1587 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1588 unsigned TotalSize = Ty.getSizeInBits();
1589 unsigned NarrowSize = NarrowTy.getSizeInBits();
1590 int NumParts = TotalSize / NarrowSize;
1591
1592 SmallVector<Register, 4> PartRegs;
1593 for (int I = 0; I != NumParts; ++I) {
1594 unsigned Offset = I * NarrowSize;
1595 auto K = MIRBuilder.buildConstant(NarrowTy,
1596 Val.lshr(Offset).trunc(NarrowSize));
1597 PartRegs.push_back(K.getReg(0));
1598 }
1599
1600 LLT LeftoverTy;
1601 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1602 SmallVector<Register, 1> LeftoverRegs;
1603 if (LeftoverBits != 0) {
1604 LeftoverTy = LLT::scalar(LeftoverBits);
1605 auto K = MIRBuilder.buildConstant(
1606 LeftoverTy,
1607 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1608 LeftoverRegs.push_back(K.getReg(0));
1609 }
1610
1611 insertParts(MI.getOperand(0).getReg(),
1612 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1613
1614 MI.eraseFromParent();
1615 return Legalized;
1616 }
1617 case TargetOpcode::G_SEXT:
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_ANYEXT:
1620 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1621 case TargetOpcode::G_TRUNC: {
1622 if (TypeIdx != 1)
1623 return UnableToLegalize;
1624
1625 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1626 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1627 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1628 return UnableToLegalize;
1629 }
1630
1631 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1632 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1633 MI.eraseFromParent();
1634 return Legalized;
1635 }
1636 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1637 case TargetOpcode::G_FREEZE: {
1638 if (TypeIdx != 0)
1639 return UnableToLegalize;
1640
1641 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1642 // Should widen scalar first
1643 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1644 return UnableToLegalize;
1645
1646 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1648 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1649 Parts.push_back(
1650 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1651 .getReg(0));
1652 }
1653
1654 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1655 MI.eraseFromParent();
1656 return Legalized;
1657 }
1658 case TargetOpcode::G_ADD:
1659 case TargetOpcode::G_SUB:
1660 case TargetOpcode::G_SADDO:
1661 case TargetOpcode::G_SSUBO:
1662 case TargetOpcode::G_SADDE:
1663 case TargetOpcode::G_SSUBE:
1664 case TargetOpcode::G_UADDO:
1665 case TargetOpcode::G_USUBO:
1666 case TargetOpcode::G_UADDE:
1667 case TargetOpcode::G_USUBE:
1668 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1669 case TargetOpcode::G_MUL:
1670 case TargetOpcode::G_UMULH:
1671 return narrowScalarMul(MI, NarrowTy);
1672 case TargetOpcode::G_EXTRACT:
1673 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1674 case TargetOpcode::G_INSERT:
1675 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1676 case TargetOpcode::G_LOAD: {
1677 auto &LoadMI = cast<GLoad>(MI);
1678 Register DstReg = LoadMI.getDstReg();
1679 LLT DstTy = MRI.getType(DstReg);
1680 if (DstTy.isVector())
1681 return UnableToLegalize;
1682
1683 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1684 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1685 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1686 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1687 LoadMI.eraseFromParent();
1688 return Legalized;
1689 }
1690
1691 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1692 }
1693 case TargetOpcode::G_ZEXTLOAD:
1694 case TargetOpcode::G_SEXTLOAD: {
1695 auto &LoadMI = cast<GExtLoad>(MI);
1696 Register DstReg = LoadMI.getDstReg();
1697 Register PtrReg = LoadMI.getPointerReg();
1698
1699 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1700 auto &MMO = LoadMI.getMMO();
1701 unsigned MemSize = MMO.getSizeInBits().getValue();
1702
1703 if (MemSize == NarrowSize) {
1704 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1705 } else if (MemSize < NarrowSize) {
1706 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1707 } else if (MemSize > NarrowSize) {
1708 // FIXME: Need to split the load.
1709 return UnableToLegalize;
1710 }
1711
1712 if (isa<GZExtLoad>(LoadMI))
1713 MIRBuilder.buildZExt(DstReg, TmpReg);
1714 else
1715 MIRBuilder.buildSExt(DstReg, TmpReg);
1716
1717 LoadMI.eraseFromParent();
1718 return Legalized;
1719 }
1720 case TargetOpcode::G_STORE: {
1721 auto &StoreMI = cast<GStore>(MI);
1722
1723 Register SrcReg = StoreMI.getValueReg();
1724 LLT SrcTy = MRI.getType(SrcReg);
1725 if (SrcTy.isVector())
1726 return UnableToLegalize;
1727
1728 int NumParts = SizeOp0 / NarrowSize;
1729 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1730 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1731 if (SrcTy.isVector() && LeftoverBits != 0)
1732 return UnableToLegalize;
1733
1734 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1735 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1736 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1737 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1738 StoreMI.eraseFromParent();
1739 return Legalized;
1740 }
1741
1742 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1743 }
1744 case TargetOpcode::G_SELECT:
1745 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1746 case TargetOpcode::G_AND:
1747 case TargetOpcode::G_OR:
1748 case TargetOpcode::G_XOR: {
1749 // Legalize bitwise operation:
1750 // A = BinOp<Ty> B, C
1751 // into:
1752 // B1, ..., BN = G_UNMERGE_VALUES B
1753 // C1, ..., CN = G_UNMERGE_VALUES C
1754 // A1 = BinOp<Ty/N> B1, C2
1755 // ...
1756 // AN = BinOp<Ty/N> BN, CN
1757 // A = G_MERGE_VALUES A1, ..., AN
1758 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1759 }
1760 case TargetOpcode::G_SHL:
1761 case TargetOpcode::G_LSHR:
1762 case TargetOpcode::G_ASHR:
1763 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTLZ:
1765 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1766 case TargetOpcode::G_CTTZ:
1767 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1768 case TargetOpcode::G_CTPOP:
1769 if (TypeIdx == 1)
1770 switch (MI.getOpcode()) {
1771 case TargetOpcode::G_CTLZ:
1772 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1773 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTTZ:
1775 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1776 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1777 case TargetOpcode::G_CTPOP:
1778 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1779 default:
1780 return UnableToLegalize;
1781 }
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_INTTOPTR:
1788 if (TypeIdx != 1)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarSrc(MI, NarrowTy, 1);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PTRTOINT:
1796 if (TypeIdx != 0)
1797 return UnableToLegalize;
1798
1799 Observer.changingInstr(MI);
1800 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 case TargetOpcode::G_PHI: {
1804 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1805 // NarrowSize.
1806 if (SizeOp0 % NarrowSize != 0)
1807 return UnableToLegalize;
1808
1809 unsigned NumParts = SizeOp0 / NarrowSize;
1810 SmallVector<Register, 2> DstRegs(NumParts);
1811 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1812 Observer.changingInstr(MI);
1813 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1814 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1815 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1816 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1817 SrcRegs[i / 2], MIRBuilder, MRI);
1818 }
1819 MachineBasicBlock &MBB = *MI.getParent();
1820 MIRBuilder.setInsertPt(MBB, MI);
1821 for (unsigned i = 0; i < NumParts; ++i) {
1822 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1824 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1825 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1826 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1827 }
1828 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1829 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1830 Observer.changedInstr(MI);
1831 MI.eraseFromParent();
1832 return Legalized;
1833 }
1834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1835 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 if (TypeIdx != 2)
1837 return UnableToLegalize;
1838
1839 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1840 Observer.changingInstr(MI);
1841 narrowScalarSrc(MI, NarrowTy, OpIdx);
1842 Observer.changedInstr(MI);
1843 return Legalized;
1844 }
1845 case TargetOpcode::G_ICMP: {
1846 Register LHS = MI.getOperand(2).getReg();
1847 LLT SrcTy = MRI.getType(LHS);
1848 CmpInst::Predicate Pred =
1849 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1850
1851 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1852 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1853 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1854 LHSLeftoverRegs, MIRBuilder, MRI))
1855 return UnableToLegalize;
1856
1857 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1858 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1859 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1860 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1861 return UnableToLegalize;
1862
1863 // We now have the LHS and RHS of the compare split into narrow-type
1864 // registers, plus potentially some leftover type.
1865 Register Dst = MI.getOperand(0).getReg();
1866 LLT ResTy = MRI.getType(Dst);
1867 if (ICmpInst::isEquality(Pred)) {
1868 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1869 // them together. For each equal part, the result should be all 0s. For
1870 // each non-equal part, we'll get at least one 1.
1871 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1873 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1874 auto LHS = std::get<0>(LHSAndRHS);
1875 auto RHS = std::get<1>(LHSAndRHS);
1876 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1877 Xors.push_back(Xor);
1878 }
1879
1880 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1881 // to the desired narrow type so that we can OR them together later.
1882 SmallVector<Register, 4> WidenedXors;
1883 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1884 auto LHS = std::get<0>(LHSAndRHS);
1885 auto RHS = std::get<1>(LHSAndRHS);
1886 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1887 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1888 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1889 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1890 llvm::append_range(Xors, WidenedXors);
1891 }
1892
1893 // Now, for each part we broke up, we know if they are equal/not equal
1894 // based off the G_XOR. We can OR these all together and compare against
1895 // 0 to get the result.
1896 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1897 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1898 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1899 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1900 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1901 } else {
1902 Register CmpIn;
1903 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1904 Register CmpOut;
1905 CmpInst::Predicate PartPred;
1906
1907 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1908 PartPred = Pred;
1909 CmpOut = Dst;
1910 } else {
1911 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1912 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 }
1914
1915 if (!CmpIn) {
1916 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 } else {
1919 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1920 RHSPartRegs[I]);
1921 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1922 LHSPartRegs[I], RHSPartRegs[I]);
1923 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1924 }
1925
1926 CmpIn = CmpOut;
1927 }
1928
1929 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1930 Register CmpOut;
1931 CmpInst::Predicate PartPred;
1932
1933 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1934 PartPred = Pred;
1935 CmpOut = Dst;
1936 } else {
1937 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1938 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 }
1940
1941 if (!CmpIn) {
1942 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 } else {
1945 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1946 RHSLeftoverRegs[I]);
1947 auto CmpEq =
1948 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1949 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1950 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 }
1952
1953 CmpIn = CmpOut;
1954 }
1955 }
1956 MI.eraseFromParent();
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FCMP:
1960 if (TypeIdx != 0)
1961 return UnableToLegalize;
1962
1963 Observer.changingInstr(MI);
1964 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1965 Observer.changedInstr(MI);
1966 return Legalized;
1967
1968 case TargetOpcode::G_SEXT_INREG: {
1969 if (TypeIdx != 0)
1970 return UnableToLegalize;
1971
1972 int64_t SizeInBits = MI.getOperand(2).getImm();
1973
1974 // So long as the new type has more bits than the bits we're extending we
1975 // don't need to break it apart.
1976 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1977 Observer.changingInstr(MI);
1978 // We don't lose any non-extension bits by truncating the src and
1979 // sign-extending the dst.
1980 MachineOperand &MO1 = MI.getOperand(1);
1981 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1982 MO1.setReg(TruncMIB.getReg(0));
1983
1984 MachineOperand &MO2 = MI.getOperand(0);
1985 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1987 MIRBuilder.buildSExt(MO2, DstExt);
1988 MO2.setReg(DstExt);
1989 Observer.changedInstr(MI);
1990 return Legalized;
1991 }
1992
1993 // Break it apart. Components below the extension point are unmodified. The
1994 // component containing the extension point becomes a narrower SEXT_INREG.
1995 // Components above it are ashr'd from the component containing the
1996 // extension point.
1997 if (SizeOp0 % NarrowSize != 0)
1998 return UnableToLegalize;
1999 int NumParts = SizeOp0 / NarrowSize;
2000
2001 // List the registers where the destination will be scattered.
2003 // List the registers where the source will be split.
2005
2006 // Create all the temporary registers.
2007 for (int i = 0; i < NumParts; ++i) {
2008 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2009
2010 SrcRegs.push_back(SrcReg);
2011 }
2012
2013 // Explode the big arguments into smaller chunks.
2014 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2015
2016 Register AshrCstReg =
2017 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2018 .getReg(0);
2019 Register FullExtensionReg;
2020 Register PartialExtensionReg;
2021
2022 // Do the operation on each small part.
2023 for (int i = 0; i < NumParts; ++i) {
2024 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2025 DstRegs.push_back(SrcRegs[i]);
2026 PartialExtensionReg = DstRegs.back();
2027 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2028 assert(PartialExtensionReg &&
2029 "Expected to visit partial extension before full");
2030 if (FullExtensionReg) {
2031 DstRegs.push_back(FullExtensionReg);
2032 continue;
2033 }
2034 DstRegs.push_back(
2035 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2036 .getReg(0));
2037 FullExtensionReg = DstRegs.back();
2038 } else {
2039 DstRegs.push_back(
2041 .buildInstr(
2042 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2043 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2044 .getReg(0));
2045 PartialExtensionReg = DstRegs.back();
2046 }
2047 }
2048
2049 // Gather the destination registers into the final destination.
2050 Register DstReg = MI.getOperand(0).getReg();
2051 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2052 MI.eraseFromParent();
2053 return Legalized;
2054 }
2055 case TargetOpcode::G_BSWAP:
2056 case TargetOpcode::G_BITREVERSE: {
2057 if (SizeOp0 % NarrowSize != 0)
2058 return UnableToLegalize;
2059
2060 Observer.changingInstr(MI);
2061 SmallVector<Register, 2> SrcRegs, DstRegs;
2062 unsigned NumParts = SizeOp0 / NarrowSize;
2063 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2064 MIRBuilder, MRI);
2065
2066 for (unsigned i = 0; i < NumParts; ++i) {
2067 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2068 {SrcRegs[NumParts - 1 - i]});
2069 DstRegs.push_back(DstPart.getReg(0));
2070 }
2071
2072 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2073
2074 Observer.changedInstr(MI);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 case TargetOpcode::G_PTRMASK: {
2080 if (TypeIdx != 1)
2081 return UnableToLegalize;
2082 Observer.changingInstr(MI);
2083 narrowScalarSrc(MI, NarrowTy, 2);
2084 Observer.changedInstr(MI);
2085 return Legalized;
2086 }
2087 case TargetOpcode::G_FPTOUI:
2088 case TargetOpcode::G_FPTOSI:
2089 case TargetOpcode::G_FPTOUI_SAT:
2090 case TargetOpcode::G_FPTOSI_SAT:
2091 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2092 case TargetOpcode::G_FPEXT:
2093 if (TypeIdx != 0)
2094 return UnableToLegalize;
2095 Observer.changingInstr(MI);
2096 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2097 Observer.changedInstr(MI);
2098 return Legalized;
2099 case TargetOpcode::G_FLDEXP:
2100 case TargetOpcode::G_STRICT_FLDEXP:
2101 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2102 case TargetOpcode::G_VSCALE: {
2103 Register Dst = MI.getOperand(0).getReg();
2104 LLT Ty = MRI.getType(Dst);
2105
2106 // Assume VSCALE(1) fits into a legal integer
2107 const APInt One(NarrowTy.getSizeInBits(), 1);
2108 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2109 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2110 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2111 MIRBuilder.buildMul(Dst, ZExt, C);
2112
2113 MI.eraseFromParent();
2114 return Legalized;
2115 }
2116 }
2117}
2118
2120 LLT Ty = MRI.getType(Val);
2121 if (Ty.isScalar())
2122 return Val;
2123
2124 const DataLayout &DL = MIRBuilder.getDataLayout();
2125 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2126 if (Ty.isPointer()) {
2127 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2128 return Register();
2129 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2130 }
2131
2132 Register NewVal = Val;
2133
2134 assert(Ty.isVector());
2135 if (Ty.isPointerVector())
2136 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2137 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138}
2139
2141 unsigned OpIdx, unsigned ExtOpcode) {
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2144 MO.setReg(ExtB.getReg(0));
2145}
2146
2148 unsigned OpIdx) {
2149 MachineOperand &MO = MI.getOperand(OpIdx);
2150 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2151 MO.setReg(ExtB.getReg(0));
2152}
2153
2155 unsigned OpIdx, unsigned TruncOpcode) {
2156 MachineOperand &MO = MI.getOperand(OpIdx);
2157 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2158 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2159 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2160 MO.setReg(DstExt);
2161}
2162
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(OpIdx);
2166 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2169 MO.setReg(DstTrunc);
2170}
2171
2173 unsigned OpIdx) {
2174 MachineOperand &MO = MI.getOperand(OpIdx);
2175 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2176 Register Dst = MO.getReg();
2177 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178 MO.setReg(DstExt);
2179 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2180}
2181
2183 unsigned OpIdx) {
2184 MachineOperand &MO = MI.getOperand(OpIdx);
2185 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2186}
2187
2189 MachineOperand &Op = MI.getOperand(OpIdx);
2190 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2191}
2192
2194 MachineOperand &MO = MI.getOperand(OpIdx);
2195 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2196 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2197 MIRBuilder.buildBitcast(MO, CastDst);
2198 MO.setReg(CastDst);
2199}
2200
2202LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2203 LLT WideTy) {
2204 if (TypeIdx != 1)
2205 return UnableToLegalize;
2206
2207 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2208 if (DstTy.isVector())
2209 return UnableToLegalize;
2210
2211 LLT SrcTy = MRI.getType(Src1Reg);
2212 const int DstSize = DstTy.getSizeInBits();
2213 const int SrcSize = SrcTy.getSizeInBits();
2214 const int WideSize = WideTy.getSizeInBits();
2215 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2216
2217 unsigned NumOps = MI.getNumOperands();
2218 unsigned NumSrc = MI.getNumOperands() - 1;
2219 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2220
2221 if (WideSize >= DstSize) {
2222 // Directly pack the bits in the target type.
2223 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2224
2225 for (unsigned I = 2; I != NumOps; ++I) {
2226 const unsigned Offset = (I - 1) * PartSize;
2227
2228 Register SrcReg = MI.getOperand(I).getReg();
2229 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2230
2231 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2232
2233 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2234 MRI.createGenericVirtualRegister(WideTy);
2235
2236 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2237 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2238 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2239 ResultReg = NextResult;
2240 }
2241
2242 if (WideSize > DstSize)
2243 MIRBuilder.buildTrunc(DstReg, ResultReg);
2244 else if (DstTy.isPointer())
2245 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2246
2247 MI.eraseFromParent();
2248 return Legalized;
2249 }
2250
2251 // Unmerge the original values to the GCD type, and recombine to the next
2252 // multiple greater than the original type.
2253 //
2254 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2255 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2256 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2257 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2258 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2259 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2260 // %12:_(s12) = G_MERGE_VALUES %10, %11
2261 //
2262 // Padding with undef if necessary:
2263 //
2264 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2265 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2266 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2267 // %7:_(s2) = G_IMPLICIT_DEF
2268 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2269 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2270 // %10:_(s12) = G_MERGE_VALUES %8, %9
2271
2272 const int GCD = std::gcd(SrcSize, WideSize);
2273 LLT GCDTy = LLT::scalar(GCD);
2274
2275 SmallVector<Register, 8> NewMergeRegs;
2276 SmallVector<Register, 8> Unmerges;
2277 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2278
2279 // Decompose the original operands if they don't evenly divide.
2280 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2281 Register SrcReg = MO.getReg();
2282 if (GCD == SrcSize) {
2283 Unmerges.push_back(SrcReg);
2284 } else {
2285 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2286 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2287 Unmerges.push_back(Unmerge.getReg(J));
2288 }
2289 }
2290
2291 // Pad with undef to the next size that is a multiple of the requested size.
2292 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2293 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2294 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2295 Unmerges.push_back(UndefReg);
2296 }
2297
2298 const int PartsPerGCD = WideSize / GCD;
2299
2300 // Build merges of each piece.
2301 ArrayRef<Register> Slicer(Unmerges);
2302 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2303 auto Merge =
2304 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2305 NewMergeRegs.push_back(Merge.getReg(0));
2306 }
2307
2308 // A truncate may be necessary if the requested type doesn't evenly divide the
2309 // original result type.
2310 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2311 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2312 } else {
2313 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2314 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2315 }
2316
2317 MI.eraseFromParent();
2318 return Legalized;
2319}
2320
2322LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2323 LLT WideTy) {
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 int NumDst = MI.getNumOperands() - 1;
2328 Register SrcReg = MI.getOperand(NumDst).getReg();
2329 LLT SrcTy = MRI.getType(SrcReg);
2330 if (SrcTy.isVector())
2331 return UnableToLegalize;
2332
2333 Register Dst0Reg = MI.getOperand(0).getReg();
2334 LLT DstTy = MRI.getType(Dst0Reg);
2335 if (!DstTy.isScalar())
2336 return UnableToLegalize;
2337
2338 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2339 if (SrcTy.isPointer()) {
2340 const DataLayout &DL = MIRBuilder.getDataLayout();
2341 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2342 LLVM_DEBUG(
2343 dbgs() << "Not casting non-integral address space integer\n");
2344 return UnableToLegalize;
2345 }
2346
2347 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2348 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2349 }
2350
2351 // Widen SrcTy to WideTy. This does not affect the result, but since the
2352 // user requested this size, it is probably better handled than SrcTy and
2353 // should reduce the total number of legalization artifacts.
2354 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2355 SrcTy = WideTy;
2356 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2357 }
2358
2359 // Theres no unmerge type to target. Directly extract the bits from the
2360 // source type
2361 unsigned DstSize = DstTy.getSizeInBits();
2362
2363 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2364 for (int I = 1; I != NumDst; ++I) {
2365 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2366 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2368 }
2369
2370 MI.eraseFromParent();
2371 return Legalized;
2372 }
2373
2374 // Extend the source to a wider type.
2375 LLT LCMTy = getLCMType(SrcTy, WideTy);
2376
2377 Register WideSrc = SrcReg;
2378 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2379 // TODO: If this is an integral address space, cast to integer and anyext.
2380 if (SrcTy.isPointer()) {
2381 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2382 return UnableToLegalize;
2383 }
2384
2385 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2386 }
2387
2388 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2389
2390 // Create a sequence of unmerges and merges to the original results. Since we
2391 // may have widened the source, we will need to pad the results with dead defs
2392 // to cover the source register.
2393 // e.g. widen s48 to s64:
2394 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2395 //
2396 // =>
2397 // %4:_(s192) = G_ANYEXT %0:_(s96)
2398 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2399 // ; unpack to GCD type, with extra dead defs
2400 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2401 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2402 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2403 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2404 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2405 const LLT GCDTy = getGCDType(WideTy, DstTy);
2406 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2407 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2408
2409 // Directly unmerge to the destination without going through a GCD type
2410 // if possible
2411 if (PartsPerRemerge == 1) {
2412 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2413
2414 for (int I = 0; I != NumUnmerge; ++I) {
2415 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2416
2417 for (int J = 0; J != PartsPerUnmerge; ++J) {
2418 int Idx = I * PartsPerUnmerge + J;
2419 if (Idx < NumDst)
2420 MIB.addDef(MI.getOperand(Idx).getReg());
2421 else {
2422 // Create dead def for excess components.
2423 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 }
2425 }
2426
2427 MIB.addUse(Unmerge.getReg(I));
2428 }
2429 } else {
2430 SmallVector<Register, 16> Parts;
2431 for (int J = 0; J != NumUnmerge; ++J)
2432 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2433
2434 SmallVector<Register, 8> RemergeParts;
2435 for (int I = 0; I != NumDst; ++I) {
2436 for (int J = 0; J < PartsPerRemerge; ++J) {
2437 const int Idx = I * PartsPerRemerge + J;
2438 RemergeParts.emplace_back(Parts[Idx]);
2439 }
2440
2441 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2442 RemergeParts.clear();
2443 }
2444 }
2445
2446 MI.eraseFromParent();
2447 return Legalized;
2448}
2449
2451LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2452 LLT WideTy) {
2453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2454 unsigned Offset = MI.getOperand(2).getImm();
2455
2456 if (TypeIdx == 0) {
2457 if (SrcTy.isVector() || DstTy.isVector())
2458 return UnableToLegalize;
2459
2460 SrcOp Src(SrcReg);
2461 if (SrcTy.isPointer()) {
2462 // Extracts from pointers can be handled only if they are really just
2463 // simple integers.
2464 const DataLayout &DL = MIRBuilder.getDataLayout();
2465 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2466 return UnableToLegalize;
2467
2468 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2469 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 SrcTy = SrcAsIntTy;
2471 }
2472
2473 if (DstTy.isPointer())
2474 return UnableToLegalize;
2475
2476 if (Offset == 0) {
2477 // Avoid a shift in the degenerate case.
2478 MIRBuilder.buildTrunc(DstReg,
2479 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2480 MI.eraseFromParent();
2481 return Legalized;
2482 }
2483
2484 // Do a shift in the source type.
2485 LLT ShiftTy = SrcTy;
2486 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2487 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2488 ShiftTy = WideTy;
2489 }
2490
2491 auto LShr = MIRBuilder.buildLShr(
2492 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2493 MIRBuilder.buildTrunc(DstReg, LShr);
2494 MI.eraseFromParent();
2495 return Legalized;
2496 }
2497
2498 if (SrcTy.isScalar()) {
2499 Observer.changingInstr(MI);
2500 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2501 Observer.changedInstr(MI);
2502 return Legalized;
2503 }
2504
2505 if (!SrcTy.isVector())
2506 return UnableToLegalize;
2507
2508 if (DstTy != SrcTy.getElementType())
2509 return UnableToLegalize;
2510
2511 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2512 return UnableToLegalize;
2513
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2516
2517 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2518 Offset);
2519 widenScalarDst(MI, WideTy.getScalarType(), 0);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522}
2523
2525LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2526 LLT WideTy) {
2527 if (TypeIdx != 0 || WideTy.isVector())
2528 return UnableToLegalize;
2529 Observer.changingInstr(MI);
2530 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2531 widenScalarDst(MI, WideTy);
2532 Observer.changedInstr(MI);
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 unsigned Opcode;
2540 unsigned ExtOpcode;
2541 std::optional<Register> CarryIn;
2542 switch (MI.getOpcode()) {
2543 default:
2544 llvm_unreachable("Unexpected opcode!");
2545 case TargetOpcode::G_SADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_SSUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_SEXT;
2552 break;
2553 case TargetOpcode::G_UADDO:
2554 Opcode = TargetOpcode::G_ADD;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_USUBO:
2558 Opcode = TargetOpcode::G_SUB;
2559 ExtOpcode = TargetOpcode::G_ZEXT;
2560 break;
2561 case TargetOpcode::G_SADDE:
2562 Opcode = TargetOpcode::G_UADDE;
2563 ExtOpcode = TargetOpcode::G_SEXT;
2564 CarryIn = MI.getOperand(4).getReg();
2565 break;
2566 case TargetOpcode::G_SSUBE:
2567 Opcode = TargetOpcode::G_USUBE;
2568 ExtOpcode = TargetOpcode::G_SEXT;
2569 CarryIn = MI.getOperand(4).getReg();
2570 break;
2571 case TargetOpcode::G_UADDE:
2572 Opcode = TargetOpcode::G_UADDE;
2573 ExtOpcode = TargetOpcode::G_ZEXT;
2574 CarryIn = MI.getOperand(4).getReg();
2575 break;
2576 case TargetOpcode::G_USUBE:
2577 Opcode = TargetOpcode::G_USUBE;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 CarryIn = MI.getOperand(4).getReg();
2580 break;
2581 }
2582
2583 if (TypeIdx == 1) {
2584 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2585
2586 Observer.changingInstr(MI);
2587 if (CarryIn)
2588 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2589 widenScalarDst(MI, WideTy, 1);
2590
2591 Observer.changedInstr(MI);
2592 return Legalized;
2593 }
2594
2595 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2596 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2597 // Do the arithmetic in the larger type.
2598 Register NewOp;
2599 if (CarryIn) {
2600 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2601 NewOp = MIRBuilder
2602 .buildInstr(Opcode, {WideTy, CarryOutTy},
2603 {LHSExt, RHSExt, *CarryIn})
2604 .getReg(0);
2605 } else {
2606 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2607 }
2608 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2609 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2610 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2611 // There is no overflow if the ExtOp is the same as NewOp.
2612 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2613 // Now trunc the NewOp to the original result.
2614 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2615 MI.eraseFromParent();
2616 return Legalized;
2617}
2618
2620LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2621 LLT WideTy) {
2622 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2623 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2624 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2625 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2626 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2627 // We can convert this to:
2628 // 1. Any extend iN to iM
2629 // 2. SHL by M-N
2630 // 3. [US][ADD|SUB|SHL]SAT
2631 // 4. L/ASHR by M-N
2632 //
2633 // It may be more efficient to lower this to a min and a max operation in
2634 // the higher precision arithmetic if the promoted operation isn't legal,
2635 // but this decision is up to the target's lowering request.
2636 Register DstReg = MI.getOperand(0).getReg();
2637
2638 unsigned NewBits = WideTy.getScalarSizeInBits();
2639 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2640
2641 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2642 // must not left shift the RHS to preserve the shift amount.
2643 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2644 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2645 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2646 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2648 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2649
2650 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2651 {ShiftL, ShiftR}, MI.getFlags());
2652
2653 // Use a shift that will preserve the number of sign bits when the trunc is
2654 // folded away.
2655 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2656 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2657
2658 MIRBuilder.buildTrunc(DstReg, Result);
2659 MI.eraseFromParent();
2660 return Legalized;
2661}
2662
2664LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2665 LLT WideTy) {
2666 if (TypeIdx == 1) {
2667 Observer.changingInstr(MI);
2668 widenScalarDst(MI, WideTy, 1);
2669 Observer.changedInstr(MI);
2670 return Legalized;
2671 }
2672
2673 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2674 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2675 LLT SrcTy = MRI.getType(LHS);
2676 LLT OverflowTy = MRI.getType(OriginalOverflow);
2677 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2678
2679 // To determine if the result overflowed in the larger type, we extend the
2680 // input to the larger type, do the multiply (checking if it overflows),
2681 // then also check the high bits of the result to see if overflow happened
2682 // there.
2683 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2684 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2685 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2686
2687 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2688 // so we don't need to check the overflow result of larger type Mulo.
2689 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2690
2691 unsigned MulOpc =
2692 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2693
2694 MachineInstrBuilder Mulo;
2695 if (WideMulCanOverflow)
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2697 {LeftOperand, RightOperand});
2698 else
2699 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2700
2701 auto Mul = Mulo->getOperand(0);
2702 MIRBuilder.buildTrunc(Result, Mul);
2703
2704 MachineInstrBuilder ExtResult;
2705 // Overflow occurred if it occurred in the larger type, or if the high part
2706 // of the result does not zero/sign-extend the low part. Check this second
2707 // possibility first.
2708 if (IsSigned) {
2709 // For signed, overflow occurred when the high part does not sign-extend
2710 // the low part.
2711 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2712 } else {
2713 // Unsigned overflow occurred when the high part does not zero-extend the
2714 // low part.
2715 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2716 }
2717
2718 if (WideMulCanOverflow) {
2719 auto Overflow =
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2721 // Finally check if the multiplication in the larger type itself overflowed.
2722 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2723 } else {
2724 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2725 }
2726 MI.eraseFromParent();
2727 return Legalized;
2728}
2729
2732 unsigned Opcode = MI.getOpcode();
2733 switch (Opcode) {
2734 default:
2735 return UnableToLegalize;
2736 case TargetOpcode::G_ATOMICRMW_XCHG:
2737 case TargetOpcode::G_ATOMICRMW_ADD:
2738 case TargetOpcode::G_ATOMICRMW_SUB:
2739 case TargetOpcode::G_ATOMICRMW_AND:
2740 case TargetOpcode::G_ATOMICRMW_OR:
2741 case TargetOpcode::G_ATOMICRMW_XOR:
2742 case TargetOpcode::G_ATOMICRMW_MIN:
2743 case TargetOpcode::G_ATOMICRMW_MAX:
2744 case TargetOpcode::G_ATOMICRMW_UMIN:
2745 case TargetOpcode::G_ATOMICRMW_UMAX:
2746 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2747 Observer.changingInstr(MI);
2748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG:
2753 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2761 if (TypeIdx == 0) {
2762 Observer.changingInstr(MI);
2763 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2764 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2765 widenScalarDst(MI, WideTy, 0);
2766 Observer.changedInstr(MI);
2767 return Legalized;
2768 }
2769 assert(TypeIdx == 1 &&
2770 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2771 Observer.changingInstr(MI);
2772 widenScalarDst(MI, WideTy, 1);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_EXTRACT:
2776 return widenScalarExtract(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_INSERT:
2778 return widenScalarInsert(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_MERGE_VALUES:
2780 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2781 case TargetOpcode::G_UNMERGE_VALUES:
2782 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2783 case TargetOpcode::G_SADDO:
2784 case TargetOpcode::G_SSUBO:
2785 case TargetOpcode::G_UADDO:
2786 case TargetOpcode::G_USUBO:
2787 case TargetOpcode::G_SADDE:
2788 case TargetOpcode::G_SSUBE:
2789 case TargetOpcode::G_UADDE:
2790 case TargetOpcode::G_USUBE:
2791 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_UMULO:
2793 case TargetOpcode::G_SMULO:
2794 return widenScalarMulo(MI, TypeIdx, WideTy);
2795 case TargetOpcode::G_SADDSAT:
2796 case TargetOpcode::G_SSUBSAT:
2797 case TargetOpcode::G_SSHLSAT:
2798 case TargetOpcode::G_UADDSAT:
2799 case TargetOpcode::G_USUBSAT:
2800 case TargetOpcode::G_USHLSAT:
2801 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_CTTZ:
2803 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2804 case TargetOpcode::G_CTLZ:
2805 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2806 case TargetOpcode::G_CTPOP: {
2807 if (TypeIdx == 0) {
2808 Observer.changingInstr(MI);
2809 widenScalarDst(MI, WideTy, 0);
2810 Observer.changedInstr(MI);
2811 return Legalized;
2812 }
2813
2814 Register SrcReg = MI.getOperand(1).getReg();
2815
2816 // First extend the input.
2817 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2818 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2819 ? TargetOpcode::G_ANYEXT
2820 : TargetOpcode::G_ZEXT;
2821 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2822 LLT CurTy = MRI.getType(SrcReg);
2823 unsigned NewOpc = Opcode;
2824 if (NewOpc == TargetOpcode::G_CTTZ) {
2825 // The count is the same in the larger type except if the original
2826 // value was zero. This can be handled by setting the bit just off
2827 // the top of the original type.
2828 auto TopBit =
2830 MIBSrc = MIRBuilder.buildOr(
2831 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2832 // Now we know the operand is non-zero, use the more relaxed opcode.
2833 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2834 }
2835
2836 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2837
2838 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2839 // An optimization where the result is the CTLZ after the left shift by
2840 // (Difference in widety and current ty), that is,
2841 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2842 // Result = ctlz MIBSrc
2843 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2844 MIRBuilder.buildConstant(WideTy, SizeDiff));
2845 }
2846
2847 // Perform the operation at the larger size.
2848 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2849 // This is already the correct result for CTPOP and CTTZs
2850 if (Opcode == TargetOpcode::G_CTLZ) {
2851 // The correct result is NewOp - (Difference in widety and current ty).
2852 MIBNewOp = MIRBuilder.buildSub(
2853 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2857 MI.eraseFromParent();
2858 return Legalized;
2859 }
2860 case TargetOpcode::G_BSWAP: {
2861 Observer.changingInstr(MI);
2862 Register DstReg = MI.getOperand(0).getReg();
2863
2864 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2865 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2866 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2867 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2868
2869 MI.getOperand(0).setReg(DstExt);
2870
2871 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2872
2873 LLT Ty = MRI.getType(DstReg);
2874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2875 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2876 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2877
2878 MIRBuilder.buildTrunc(DstReg, ShrReg);
2879 Observer.changedInstr(MI);
2880 return Legalized;
2881 }
2882 case TargetOpcode::G_BITREVERSE: {
2883 Observer.changingInstr(MI);
2884
2885 Register DstReg = MI.getOperand(0).getReg();
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888
2889 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2890 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2891 MI.getOperand(0).setReg(DstExt);
2892 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2893
2894 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2895 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2896 MIRBuilder.buildTrunc(DstReg, Shift);
2897 Observer.changedInstr(MI);
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_FREEZE:
2901 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2902 Observer.changingInstr(MI);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 widenScalarDst(MI, WideTy);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907
2908 case TargetOpcode::G_ABS:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ADD:
2916 case TargetOpcode::G_AND:
2917 case TargetOpcode::G_MUL:
2918 case TargetOpcode::G_OR:
2919 case TargetOpcode::G_XOR:
2920 case TargetOpcode::G_SUB:
2921 case TargetOpcode::G_SHUFFLE_VECTOR:
2922 // Perform operation at larger width (any extension is fines here, high bits
2923 // don't affect the result) and then truncate the result back to the
2924 // original type.
2925 Observer.changingInstr(MI);
2926 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 Observer.changedInstr(MI);
2930 return Legalized;
2931
2932 case TargetOpcode::G_SBFX:
2933 case TargetOpcode::G_UBFX:
2934 Observer.changingInstr(MI);
2935
2936 if (TypeIdx == 0) {
2937 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2938 widenScalarDst(MI, WideTy);
2939 } else {
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2941 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2942 }
2943
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_SHL:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 assert(TypeIdx == 1);
2955 // The "number of bits to shift" operand must preserve its value as an
2956 // unsigned integer:
2957 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2958 }
2959
2960 Observer.changedInstr(MI);
2961 return Legalized;
2962
2963 case TargetOpcode::G_ROTR:
2964 case TargetOpcode::G_ROTL:
2965 if (TypeIdx != 1)
2966 return UnableToLegalize;
2967
2968 Observer.changingInstr(MI);
2969 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2970 Observer.changedInstr(MI);
2971 return Legalized;
2972
2973 case TargetOpcode::G_SDIV:
2974 case TargetOpcode::G_SREM:
2975 case TargetOpcode::G_SMIN:
2976 case TargetOpcode::G_SMAX:
2977 case TargetOpcode::G_ABDS:
2978 Observer.changingInstr(MI);
2979 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2980 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2981 widenScalarDst(MI, WideTy);
2982 Observer.changedInstr(MI);
2983 return Legalized;
2984
2985 case TargetOpcode::G_SDIVREM:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2991 widenScalarDst(MI, WideTy, 1);
2992 Observer.changedInstr(MI);
2993 return Legalized;
2994
2995 case TargetOpcode::G_ASHR:
2996 case TargetOpcode::G_LSHR:
2997 Observer.changingInstr(MI);
2998
2999 if (TypeIdx == 0) {
3000 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3001 : TargetOpcode::G_ZEXT;
3002
3003 widenScalarSrc(MI, WideTy, 1, CvtOp);
3004 widenScalarDst(MI, WideTy);
3005 } else {
3006 assert(TypeIdx == 1);
3007 // The "number of bits to shift" operand must preserve its value as an
3008 // unsigned integer:
3009 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3010 }
3011
3012 Observer.changedInstr(MI);
3013 return Legalized;
3014 case TargetOpcode::G_UDIV:
3015 case TargetOpcode::G_UREM:
3016 case TargetOpcode::G_ABDU:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023 case TargetOpcode::G_UDIVREM:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3029 widenScalarDst(MI, WideTy, 1);
3030 Observer.changedInstr(MI);
3031 return Legalized;
3032 case TargetOpcode::G_UMIN:
3033 case TargetOpcode::G_UMAX: {
3034 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3035
3036 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3037 unsigned ExtOpc =
3038 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3039 getApproximateEVTForLLT(WideTy, Ctx))
3040 ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 Observer.changingInstr(MI);
3044 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3045 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3046 widenScalarDst(MI, WideTy);
3047 Observer.changedInstr(MI);
3048 return Legalized;
3049 }
3050
3051 case TargetOpcode::G_SELECT:
3052 Observer.changingInstr(MI);
3053 if (TypeIdx == 0) {
3054 // Perform operation at larger width (any extension is fine here, high
3055 // bits don't affect the result) and then truncate the result back to the
3056 // original type.
3057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3058 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3059 widenScalarDst(MI, WideTy);
3060 } else {
3061 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3062 // Explicit extension is required here since high bits affect the result.
3063 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3064 }
3065 Observer.changedInstr(MI);
3066 return Legalized;
3067
3068 case TargetOpcode::G_FPEXT:
3069 if (TypeIdx != 1)
3070 return UnableToLegalize;
3071
3072 Observer.changingInstr(MI);
3073 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3074 Observer.changedInstr(MI);
3075 return Legalized;
3076 case TargetOpcode::G_FPTOSI:
3077 case TargetOpcode::G_FPTOUI:
3078 case TargetOpcode::G_INTRINSIC_LRINT:
3079 case TargetOpcode::G_INTRINSIC_LLRINT:
3080 case TargetOpcode::G_IS_FPCLASS:
3081 Observer.changingInstr(MI);
3082
3083 if (TypeIdx == 0)
3084 widenScalarDst(MI, WideTy);
3085 else
3086 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3087
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090 case TargetOpcode::G_SITOFP:
3091 Observer.changingInstr(MI);
3092
3093 if (TypeIdx == 0)
3094 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3095 else
3096 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3097
3098 Observer.changedInstr(MI);
3099 return Legalized;
3100 case TargetOpcode::G_UITOFP:
3101 Observer.changingInstr(MI);
3102
3103 if (TypeIdx == 0)
3104 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3105 else
3106 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3107
3108 Observer.changedInstr(MI);
3109 return Legalized;
3110 case TargetOpcode::G_FPTOSI_SAT:
3111 case TargetOpcode::G_FPTOUI_SAT:
3112 Observer.changingInstr(MI);
3113
3114 if (TypeIdx == 0) {
3115 Register OldDst = MI.getOperand(0).getReg();
3116 LLT Ty = MRI.getType(OldDst);
3117 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3118 Register NewDst;
3119 MI.getOperand(0).setReg(ExtReg);
3120 uint64_t ShortBits = Ty.getScalarSizeInBits();
3121 uint64_t WideBits = WideTy.getScalarSizeInBits();
3122 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3123 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3124 // z = i16 fptosi_sat(a)
3125 // ->
3126 // x = i32 fptosi_sat(a)
3127 // y = smin(x, 32767)
3128 // z = smax(y, -32768)
3129 auto MaxVal = MIRBuilder.buildConstant(
3130 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3131 auto MinVal = MIRBuilder.buildConstant(
3132 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3133 Register MidReg =
3134 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3135 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3136 } else {
3137 // z = i16 fptoui_sat(a)
3138 // ->
3139 // x = i32 fptoui_sat(a)
3140 // y = smin(x, 65535)
3141 auto MaxVal = MIRBuilder.buildConstant(
3142 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3143 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3144 }
3145 MIRBuilder.buildTrunc(OldDst, NewDst);
3146 } else
3147 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3148
3149 Observer.changedInstr(MI);
3150 return Legalized;
3151 case TargetOpcode::G_LOAD:
3152 case TargetOpcode::G_SEXTLOAD:
3153 case TargetOpcode::G_ZEXTLOAD:
3154 Observer.changingInstr(MI);
3155 widenScalarDst(MI, WideTy);
3156 Observer.changedInstr(MI);
3157 return Legalized;
3158
3159 case TargetOpcode::G_STORE: {
3160 if (TypeIdx != 0)
3161 return UnableToLegalize;
3162
3163 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3164 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3165 if (!Ty.isScalar()) {
3166 // We need to widen the vector element type.
3167 Observer.changingInstr(MI);
3168 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3169 // We also need to adjust the MMO to turn this into a truncating store.
3170 MachineMemOperand &MMO = **MI.memoperands_begin();
3171 MachineFunction &MF = MIRBuilder.getMF();
3172 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3173 MI.setMemRefs(MF, {NewMMO});
3174 Observer.changedInstr(MI);
3175 return Legalized;
3176 }
3177
3178 Observer.changingInstr(MI);
3179
3180 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3181 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3182 widenScalarSrc(MI, WideTy, 0, ExtType);
3183
3184 Observer.changedInstr(MI);
3185 return Legalized;
3186 }
3187 case TargetOpcode::G_CONSTANT: {
3188 MachineOperand &SrcMO = MI.getOperand(1);
3189 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3190 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3191 MRI.getType(MI.getOperand(0).getReg()));
3192 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3193 ExtOpc == TargetOpcode::G_ANYEXT) &&
3194 "Illegal Extend");
3195 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3196 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3197 ? SrcVal.sext(WideTy.getSizeInBits())
3198 : SrcVal.zext(WideTy.getSizeInBits());
3199 Observer.changingInstr(MI);
3200 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3201
3202 widenScalarDst(MI, WideTy);
3203 Observer.changedInstr(MI);
3204 return Legalized;
3205 }
3206 case TargetOpcode::G_FCONSTANT: {
3207 // To avoid changing the bits of the constant due to extension to a larger
3208 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3209 MachineOperand &SrcMO = MI.getOperand(1);
3210 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3211 MIRBuilder.setInstrAndDebugLoc(MI);
3212 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3213 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3214 MI.eraseFromParent();
3215 return Legalized;
3216 }
3217 case TargetOpcode::G_IMPLICIT_DEF: {
3218 Observer.changingInstr(MI);
3219 widenScalarDst(MI, WideTy);
3220 Observer.changedInstr(MI);
3221 return Legalized;
3222 }
3223 case TargetOpcode::G_BRCOND:
3224 Observer.changingInstr(MI);
3225 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3226 Observer.changedInstr(MI);
3227 return Legalized;
3228
3229 case TargetOpcode::G_FCMP:
3230 Observer.changingInstr(MI);
3231 if (TypeIdx == 0)
3232 widenScalarDst(MI, WideTy);
3233 else {
3234 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3235 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3236 }
3237 Observer.changedInstr(MI);
3238 return Legalized;
3239
3240 case TargetOpcode::G_ICMP:
3241 Observer.changingInstr(MI);
3242 if (TypeIdx == 0)
3243 widenScalarDst(MI, WideTy);
3244 else {
3245 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3246 CmpInst::Predicate Pred =
3247 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3248
3249 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3250 unsigned ExtOpcode =
3251 (CmpInst::isSigned(Pred) ||
3252 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3253 getApproximateEVTForLLT(WideTy, Ctx)))
3254 ? TargetOpcode::G_SEXT
3255 : TargetOpcode::G_ZEXT;
3256 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3257 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3258 }
3259 Observer.changedInstr(MI);
3260 return Legalized;
3261
3262 case TargetOpcode::G_PTR_ADD:
3263 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3264 Observer.changingInstr(MI);
3265 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3266 Observer.changedInstr(MI);
3267 return Legalized;
3268
3269 case TargetOpcode::G_PHI: {
3270 assert(TypeIdx == 0 && "Expecting only Idx 0");
3271
3272 Observer.changingInstr(MI);
3273 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3274 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3275 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3276 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3277 }
3278
3279 MachineBasicBlock &MBB = *MI.getParent();
3280 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3281 widenScalarDst(MI, WideTy);
3282 Observer.changedInstr(MI);
3283 return Legalized;
3284 }
3285 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3286 if (TypeIdx == 0) {
3287 Register VecReg = MI.getOperand(1).getReg();
3288 LLT VecTy = MRI.getType(VecReg);
3289 Observer.changingInstr(MI);
3290
3292 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3293 TargetOpcode::G_ANYEXT);
3294
3295 widenScalarDst(MI, WideTy, 0);
3296 Observer.changedInstr(MI);
3297 return Legalized;
3298 }
3299
3300 if (TypeIdx != 2)
3301 return UnableToLegalize;
3302 Observer.changingInstr(MI);
3303 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3304 Observer.changedInstr(MI);
3305 return Legalized;
3306 }
3307 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3308 if (TypeIdx == 0) {
3309 Observer.changingInstr(MI);
3310 const LLT WideEltTy = WideTy.getElementType();
3311
3312 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3313 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3314 widenScalarDst(MI, WideTy, 0);
3315 Observer.changedInstr(MI);
3316 return Legalized;
3317 }
3318
3319 if (TypeIdx == 1) {
3320 Observer.changingInstr(MI);
3321
3322 Register VecReg = MI.getOperand(1).getReg();
3323 LLT VecTy = MRI.getType(VecReg);
3324 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3325
3326 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3327 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3328 widenScalarDst(MI, WideVecTy, 0);
3329 Observer.changedInstr(MI);
3330 return Legalized;
3331 }
3332
3333 if (TypeIdx == 2) {
3334 Observer.changingInstr(MI);
3335 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3336 Observer.changedInstr(MI);
3337 return Legalized;
3338 }
3339
3340 return UnableToLegalize;
3341 }
3342 case TargetOpcode::G_FADD:
3343 case TargetOpcode::G_FMUL:
3344 case TargetOpcode::G_FSUB:
3345 case TargetOpcode::G_FMA:
3346 case TargetOpcode::G_FMAD:
3347 case TargetOpcode::G_FNEG:
3348 case TargetOpcode::G_FABS:
3349 case TargetOpcode::G_FCANONICALIZE:
3350 case TargetOpcode::G_FMINNUM:
3351 case TargetOpcode::G_FMAXNUM:
3352 case TargetOpcode::G_FMINNUM_IEEE:
3353 case TargetOpcode::G_FMAXNUM_IEEE:
3354 case TargetOpcode::G_FMINIMUM:
3355 case TargetOpcode::G_FMAXIMUM:
3356 case TargetOpcode::G_FMINIMUMNUM:
3357 case TargetOpcode::G_FMAXIMUMNUM:
3358 case TargetOpcode::G_FDIV:
3359 case TargetOpcode::G_FREM:
3360 case TargetOpcode::G_FCEIL:
3361 case TargetOpcode::G_FFLOOR:
3362 case TargetOpcode::G_FCOS:
3363 case TargetOpcode::G_FSIN:
3364 case TargetOpcode::G_FTAN:
3365 case TargetOpcode::G_FACOS:
3366 case TargetOpcode::G_FASIN:
3367 case TargetOpcode::G_FATAN:
3368 case TargetOpcode::G_FATAN2:
3369 case TargetOpcode::G_FCOSH:
3370 case TargetOpcode::G_FSINH:
3371 case TargetOpcode::G_FTANH:
3372 case TargetOpcode::G_FLOG10:
3373 case TargetOpcode::G_FLOG:
3374 case TargetOpcode::G_FLOG2:
3375 case TargetOpcode::G_FRINT:
3376 case TargetOpcode::G_FNEARBYINT:
3377 case TargetOpcode::G_FSQRT:
3378 case TargetOpcode::G_FEXP:
3379 case TargetOpcode::G_FEXP2:
3380 case TargetOpcode::G_FEXP10:
3381 case TargetOpcode::G_FPOW:
3382 case TargetOpcode::G_INTRINSIC_TRUNC:
3383 case TargetOpcode::G_INTRINSIC_ROUND:
3384 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3385 assert(TypeIdx == 0);
3386 Observer.changingInstr(MI);
3387
3388 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3389 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3390
3391 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3392 Observer.changedInstr(MI);
3393 return Legalized;
3394 case TargetOpcode::G_FMODF: {
3395 Observer.changingInstr(MI);
3396 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3397
3398 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3399 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3400 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3401 Observer.changedInstr(MI);
3402 return Legalized;
3403 }
3404 case TargetOpcode::G_FPOWI:
3405 case TargetOpcode::G_FLDEXP:
3406 case TargetOpcode::G_STRICT_FLDEXP: {
3407 if (TypeIdx == 0) {
3408 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3409 return UnableToLegalize;
3410
3411 Observer.changingInstr(MI);
3412 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3413 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3414 Observer.changedInstr(MI);
3415 return Legalized;
3416 }
3417
3418 if (TypeIdx == 1) {
3419 // For some reason SelectionDAG tries to promote to a libcall without
3420 // actually changing the integer type for promotion.
3421 Observer.changingInstr(MI);
3422 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3423 Observer.changedInstr(MI);
3424 return Legalized;
3425 }
3426
3427 return UnableToLegalize;
3428 }
3429 case TargetOpcode::G_FFREXP: {
3430 Observer.changingInstr(MI);
3431
3432 if (TypeIdx == 0) {
3433 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3434 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3435 } else {
3436 widenScalarDst(MI, WideTy, 1);
3437 }
3438
3439 Observer.changedInstr(MI);
3440 return Legalized;
3441 }
3442 case TargetOpcode::G_INTTOPTR:
3443 if (TypeIdx != 1)
3444 return UnableToLegalize;
3445
3446 Observer.changingInstr(MI);
3447 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 case TargetOpcode::G_PTRTOINT:
3451 if (TypeIdx != 0)
3452 return UnableToLegalize;
3453
3454 Observer.changingInstr(MI);
3455 widenScalarDst(MI, WideTy, 0);
3456 Observer.changedInstr(MI);
3457 return Legalized;
3458 case TargetOpcode::G_BUILD_VECTOR: {
3459 Observer.changingInstr(MI);
3460
3461 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3462 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3463 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3464
3465 // Avoid changing the result vector type if the source element type was
3466 // requested.
3467 if (TypeIdx == 1) {
3468 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3469 } else {
3470 widenScalarDst(MI, WideTy, 0);
3471 }
3472
3473 Observer.changedInstr(MI);
3474 return Legalized;
3475 }
3476 case TargetOpcode::G_SEXT_INREG:
3477 if (TypeIdx != 0)
3478 return UnableToLegalize;
3479
3480 Observer.changingInstr(MI);
3481 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3482 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3483 Observer.changedInstr(MI);
3484 return Legalized;
3485 case TargetOpcode::G_PTRMASK: {
3486 if (TypeIdx != 1)
3487 return UnableToLegalize;
3488 Observer.changingInstr(MI);
3489 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3490 Observer.changedInstr(MI);
3491 return Legalized;
3492 }
3493 case TargetOpcode::G_VECREDUCE_ADD: {
3494 if (TypeIdx != 1)
3495 return UnableToLegalize;
3496 Observer.changingInstr(MI);
3497 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3498 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3499 Observer.changedInstr(MI);
3500 return Legalized;
3501 }
3502 case TargetOpcode::G_VECREDUCE_FADD:
3503 case TargetOpcode::G_VECREDUCE_FMUL:
3504 case TargetOpcode::G_VECREDUCE_FMIN:
3505 case TargetOpcode::G_VECREDUCE_FMAX:
3506 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3507 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3508 if (TypeIdx != 0)
3509 return UnableToLegalize;
3510 Observer.changingInstr(MI);
3511 Register VecReg = MI.getOperand(1).getReg();
3512 LLT VecTy = MRI.getType(VecReg);
3513 LLT WideVecTy = VecTy.isVector()
3514 ? LLT::vector(VecTy.getElementCount(), WideTy)
3515 : WideTy;
3516 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3517 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3518 Observer.changedInstr(MI);
3519 return Legalized;
3520 }
3521 case TargetOpcode::G_VSCALE: {
3522 MachineOperand &SrcMO = MI.getOperand(1);
3523 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3524 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3525 // The CImm is always a signed value
3526 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3527 Observer.changingInstr(MI);
3528 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3529 widenScalarDst(MI, WideTy);
3530 Observer.changedInstr(MI);
3531 return Legalized;
3532 }
3533 case TargetOpcode::G_SPLAT_VECTOR: {
3534 if (TypeIdx != 1)
3535 return UnableToLegalize;
3536
3537 Observer.changingInstr(MI);
3538 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3539 Observer.changedInstr(MI);
3540 return Legalized;
3541 }
3542 case TargetOpcode::G_INSERT_SUBVECTOR: {
3543 if (TypeIdx != 0)
3544 return UnableToLegalize;
3545
3547 Register BigVec = IS.getBigVec();
3548 Register SubVec = IS.getSubVec();
3549
3550 LLT SubVecTy = MRI.getType(SubVec);
3551 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3552
3553 // Widen the G_INSERT_SUBVECTOR
3554 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3555 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3556 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3557 IS.getIndexImm());
3558
3559 // Truncate back down
3560 auto SplatZero = MIRBuilder.buildSplatVector(
3561 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3562 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3563 SplatZero);
3564
3565 MI.eraseFromParent();
3566
3567 return Legalized;
3568 }
3569 }
3570}
3571
3573 MachineIRBuilder &B, Register Src, LLT Ty) {
3574 auto Unmerge = B.buildUnmerge(Ty, Src);
3575 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3576 Pieces.push_back(Unmerge.getReg(I));
3577}
3578
3579static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3580 MachineIRBuilder &MIRBuilder) {
3581 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3582 MachineFunction &MF = MIRBuilder.getMF();
3583 const DataLayout &DL = MIRBuilder.getDataLayout();
3584 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3585 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3586 LLT DstLLT = MRI.getType(DstReg);
3587
3588 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3589
3590 auto Addr = MIRBuilder.buildConstantPool(
3591 AddrPtrTy,
3592 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3593
3594 MachineMemOperand *MMO =
3596 MachineMemOperand::MOLoad, DstLLT, Alignment);
3597
3598 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3599}
3600
3603 const MachineOperand &ConstOperand = MI.getOperand(1);
3604 const Constant *ConstantVal = ConstOperand.getCImm();
3605
3606 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3607 MI.eraseFromParent();
3608
3609 return Legalized;
3610}
3611
3614 const MachineOperand &ConstOperand = MI.getOperand(1);
3615 const Constant *ConstantVal = ConstOperand.getFPImm();
3616
3617 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3618 MI.eraseFromParent();
3619
3620 return Legalized;
3621}
3622
3625 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3626 if (SrcTy.isVector()) {
3627 LLT SrcEltTy = SrcTy.getElementType();
3629
3630 if (DstTy.isVector()) {
3631 int NumDstElt = DstTy.getNumElements();
3632 int NumSrcElt = SrcTy.getNumElements();
3633
3634 LLT DstEltTy = DstTy.getElementType();
3635 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3636 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3637
3638 // If there's an element size mismatch, insert intermediate casts to match
3639 // the result element type.
3640 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3641 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3642 //
3643 // =>
3644 //
3645 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3646 // %3:_(<2 x s8>) = G_BITCAST %2
3647 // %4:_(<2 x s8>) = G_BITCAST %3
3648 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3649 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3650 SrcPartTy = SrcEltTy;
3651 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3652 //
3653 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3654 //
3655 // =>
3656 //
3657 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3658 // %3:_(s16) = G_BITCAST %2
3659 // %4:_(s16) = G_BITCAST %3
3660 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3661 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3662 DstCastTy = DstEltTy;
3663 }
3664
3665 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3666 for (Register &SrcReg : SrcRegs)
3667 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3668 } else
3669 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3670
3671 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3672 MI.eraseFromParent();
3673 return Legalized;
3674 }
3675
3676 if (DstTy.isVector()) {
3678 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3679 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3680 MI.eraseFromParent();
3681 return Legalized;
3682 }
3683
3684 return UnableToLegalize;
3685}
3686
3687/// Figure out the bit offset into a register when coercing a vector index for
3688/// the wide element type. This is only for the case when promoting vector to
3689/// one with larger elements.
3690//
3691///
3692/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3693/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3695 Register Idx,
3696 unsigned NewEltSize,
3697 unsigned OldEltSize) {
3698 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3699 LLT IdxTy = B.getMRI()->getType(Idx);
3700
3701 // Now figure out the amount we need to shift to get the target bits.
3702 auto OffsetMask = B.buildConstant(
3703 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3704 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3705 return B.buildShl(IdxTy, OffsetIdx,
3706 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3707}
3708
3709/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3710/// is casting to a vector with a smaller element size, perform multiple element
3711/// extracts and merge the results. If this is coercing to a vector with larger
3712/// elements, index the bitcasted vector and extract the target element with bit
3713/// operations. This is intended to force the indexing in the native register
3714/// size for architectures that can dynamically index the register file.
3717 LLT CastTy) {
3718 if (TypeIdx != 1)
3719 return UnableToLegalize;
3720
3721 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3722
3723 LLT SrcEltTy = SrcVecTy.getElementType();
3724 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3725 unsigned OldNumElts = SrcVecTy.getNumElements();
3726
3727 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3728 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3729
3730 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3731 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3732 if (NewNumElts > OldNumElts) {
3733 // Decreasing the vector element size
3734 //
3735 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3736 // =>
3737 // v4i32:castx = bitcast x:v2i64
3738 //
3739 // i64 = bitcast
3740 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3741 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3742 //
3743 if (NewNumElts % OldNumElts != 0)
3744 return UnableToLegalize;
3745
3746 // Type of the intermediate result vector.
3747 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3748 LLT MidTy =
3749 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3750
3751 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3752
3753 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3754 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3755
3756 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3757 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3758 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3759 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3760 NewOps[I] = Elt.getReg(0);
3761 }
3762
3763 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3764 MIRBuilder.buildBitcast(Dst, NewVec);
3765 MI.eraseFromParent();
3766 return Legalized;
3767 }
3768
3769 if (NewNumElts < OldNumElts) {
3770 if (NewEltSize % OldEltSize != 0)
3771 return UnableToLegalize;
3772
3773 // This only depends on powers of 2 because we use bit tricks to figure out
3774 // the bit offset we need to shift to get the target element. A general
3775 // expansion could emit division/multiply.
3776 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3777 return UnableToLegalize;
3778
3779 // Increasing the vector element size.
3780 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3781 //
3782 // =>
3783 //
3784 // %cast = G_BITCAST %vec
3785 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3786 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3787 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3788 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3789 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3790 // %elt = G_TRUNC %elt_bits
3791
3792 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3793 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3794
3795 // Divide to get the index in the wider element type.
3796 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3797
3798 Register WideElt = CastVec;
3799 if (CastTy.isVector()) {
3800 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3801 ScaledIdx).getReg(0);
3802 }
3803
3804 // Compute the bit offset into the register of the target element.
3806 MIRBuilder, Idx, NewEltSize, OldEltSize);
3807
3808 // Shift the wide element to get the target element.
3809 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3810 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3811 MI.eraseFromParent();
3812 return Legalized;
3813 }
3814
3815 return UnableToLegalize;
3816}
3817
3818/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3819/// TargetReg, while preserving other bits in \p TargetReg.
3820///
3821/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3823 Register TargetReg, Register InsertReg,
3824 Register OffsetBits) {
3825 LLT TargetTy = B.getMRI()->getType(TargetReg);
3826 LLT InsertTy = B.getMRI()->getType(InsertReg);
3827 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3828 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3829
3830 // Produce a bitmask of the value to insert
3831 auto EltMask = B.buildConstant(
3832 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3833 InsertTy.getSizeInBits()));
3834 // Shift it into position
3835 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3836 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3837
3838 // Clear out the bits in the wide element
3839 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3840
3841 // The value to insert has all zeros already, so stick it into the masked
3842 // wide element.
3843 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3844}
3845
3846/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3847/// is increasing the element size, perform the indexing in the target element
3848/// type, and use bit operations to insert at the element position. This is
3849/// intended for architectures that can dynamically index the register file and
3850/// want to force indexing in the native register size.
3853 LLT CastTy) {
3854 if (TypeIdx != 0)
3855 return UnableToLegalize;
3856
3857 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3858 MI.getFirst4RegLLTs();
3859 LLT VecTy = DstTy;
3860
3861 LLT VecEltTy = VecTy.getElementType();
3862 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3863 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3864 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3865
3866 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3867 unsigned OldNumElts = VecTy.getNumElements();
3868
3869 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3870 if (NewNumElts < OldNumElts) {
3871 if (NewEltSize % OldEltSize != 0)
3872 return UnableToLegalize;
3873
3874 // This only depends on powers of 2 because we use bit tricks to figure out
3875 // the bit offset we need to shift to get the target element. A general
3876 // expansion could emit division/multiply.
3877 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3878 return UnableToLegalize;
3879
3880 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3881 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3882
3883 // Divide to get the index in the wider element type.
3884 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3885
3886 Register ExtractedElt = CastVec;
3887 if (CastTy.isVector()) {
3888 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3889 ScaledIdx).getReg(0);
3890 }
3891
3892 // Compute the bit offset into the register of the target element.
3894 MIRBuilder, Idx, NewEltSize, OldEltSize);
3895
3896 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3897 Val, OffsetBits);
3898 if (CastTy.isVector()) {
3899 InsertedElt = MIRBuilder.buildInsertVectorElement(
3900 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3901 }
3902
3903 MIRBuilder.buildBitcast(Dst, InsertedElt);
3904 MI.eraseFromParent();
3905 return Legalized;
3906 }
3907
3908 return UnableToLegalize;
3909}
3910
3911// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3912// those that have smaller than legal operands.
3913//
3914// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3915//
3916// ===>
3917//
3918// s32 = G_BITCAST <4 x s8>
3919// s32 = G_BITCAST <4 x s8>
3920// s32 = G_BITCAST <4 x s8>
3921// s32 = G_BITCAST <4 x s8>
3922// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3923// <16 x s8> = G_BITCAST <4 x s32>
3926 LLT CastTy) {
3927 // Convert it to CONCAT instruction
3928 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3929 if (!ConcatMI) {
3930 return UnableToLegalize;
3931 }
3932
3933 // Check if bitcast is Legal
3934 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3935 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3936
3937 // Check if the build vector is Legal
3938 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3939 return UnableToLegalize;
3940 }
3941
3942 // Bitcast the sources
3943 SmallVector<Register> BitcastRegs;
3944 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3945 BitcastRegs.push_back(
3946 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3947 .getReg(0));
3948 }
3949
3950 // Build the scalar values into a vector
3951 Register BuildReg =
3952 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3953 MIRBuilder.buildBitcast(DstReg, BuildReg);
3954
3955 MI.eraseFromParent();
3956 return Legalized;
3957}
3958
3959// This bitcasts a shuffle vector to a different type currently of the same
3960// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3961// will be used instead.
3962//
3963// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3964// ===>
3965// <4 x s64> = G_PTRTOINT <4 x p0>
3966// <4 x s64> = G_PTRTOINT <4 x p0>
3967// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3968// <16 x p0> = G_INTTOPTR <16 x s64>
3971 LLT CastTy) {
3972 auto ShuffleMI = cast<GShuffleVector>(&MI);
3973 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3974 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3975
3976 // We currently only handle vectors of the same size.
3977 if (TypeIdx != 0 ||
3978 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3979 CastTy.getElementCount() != DstTy.getElementCount())
3980 return UnableToLegalize;
3981
3982 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3983
3984 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3985 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3986 auto Shuf =
3987 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3988 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3989
3990 MI.eraseFromParent();
3991 return Legalized;
3992}
3993
3994/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3995///
3996/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3997///
3998/// ===>
3999///
4000/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4001/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4002/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4005 LLT CastTy) {
4006 auto ES = cast<GExtractSubvector>(&MI);
4007
4008 if (!CastTy.isVector())
4009 return UnableToLegalize;
4010
4011 if (TypeIdx != 0)
4012 return UnableToLegalize;
4013
4014 Register Dst = ES->getReg(0);
4015 Register Src = ES->getSrcVec();
4016 uint64_t Idx = ES->getIndexImm();
4017
4018 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4019
4020 LLT DstTy = MRI.getType(Dst);
4021 LLT SrcTy = MRI.getType(Src);
4022 ElementCount DstTyEC = DstTy.getElementCount();
4023 ElementCount SrcTyEC = SrcTy.getElementCount();
4024 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4025 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4026
4027 if (DstTy == CastTy)
4028 return Legalized;
4029
4030 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4031 return UnableToLegalize;
4032
4033 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4034 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4035 if (CastEltSize < DstEltSize)
4036 return UnableToLegalize;
4037
4038 auto AdjustAmt = CastEltSize / DstEltSize;
4039 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4040 SrcTyMinElts % AdjustAmt != 0)
4041 return UnableToLegalize;
4042
4043 Idx /= AdjustAmt;
4044 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4045 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4046 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4047 MIRBuilder.buildBitcast(Dst, PromotedES);
4048
4049 ES->eraseFromParent();
4050 return Legalized;
4051}
4052
4053/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4054///
4055/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4056/// <vscale x 8 x i1>,
4057/// N
4058///
4059/// ===>
4060///
4061/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4062/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4063/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4064/// <vscale x 1 x i8>, N / 8
4065/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4068 LLT CastTy) {
4069 auto ES = cast<GInsertSubvector>(&MI);
4070
4071 if (!CastTy.isVector())
4072 return UnableToLegalize;
4073
4074 if (TypeIdx != 0)
4075 return UnableToLegalize;
4076
4077 Register Dst = ES->getReg(0);
4078 Register BigVec = ES->getBigVec();
4079 Register SubVec = ES->getSubVec();
4080 uint64_t Idx = ES->getIndexImm();
4081
4082 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4083
4084 LLT DstTy = MRI.getType(Dst);
4085 LLT BigVecTy = MRI.getType(BigVec);
4086 LLT SubVecTy = MRI.getType(SubVec);
4087
4088 if (DstTy == CastTy)
4089 return Legalized;
4090
4091 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4092 return UnableToLegalize;
4093
4094 ElementCount DstTyEC = DstTy.getElementCount();
4095 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4096 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4097 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4098 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4099 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4100
4101 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4102 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4103 if (CastEltSize < DstEltSize)
4104 return UnableToLegalize;
4105
4106 auto AdjustAmt = CastEltSize / DstEltSize;
4107 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4108 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4109 return UnableToLegalize;
4110
4111 Idx /= AdjustAmt;
4112 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4113 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4114 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4115 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4116 auto PromotedIS =
4117 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4118 MIRBuilder.buildBitcast(Dst, PromotedIS);
4119
4120 ES->eraseFromParent();
4121 return Legalized;
4122}
4123
4125 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4126 Register DstReg = LoadMI.getDstReg();
4127 Register PtrReg = LoadMI.getPointerReg();
4128 LLT DstTy = MRI.getType(DstReg);
4129 MachineMemOperand &MMO = LoadMI.getMMO();
4130 LLT MemTy = MMO.getMemoryType();
4131 MachineFunction &MF = MIRBuilder.getMF();
4132
4133 unsigned MemSizeInBits = MemTy.getSizeInBits();
4134 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4135
4136 if (MemSizeInBits != MemStoreSizeInBits) {
4137 if (MemTy.isVector())
4138 return UnableToLegalize;
4139
4140 // Promote to a byte-sized load if not loading an integral number of
4141 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4142 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4143 MachineMemOperand *NewMMO =
4144 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4145
4146 Register LoadReg = DstReg;
4147 LLT LoadTy = DstTy;
4148
4149 // If this wasn't already an extending load, we need to widen the result
4150 // register to avoid creating a load with a narrower result than the source.
4151 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4152 LoadTy = WideMemTy;
4153 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4154 }
4155
4156 if (isa<GSExtLoad>(LoadMI)) {
4157 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4158 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4159 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4160 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4161 // The extra bits are guaranteed to be zero, since we stored them that
4162 // way. A zext load from Wide thus automatically gives zext from MemVT.
4163 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4164 } else {
4165 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4166 }
4167
4168 if (DstTy != LoadTy)
4169 MIRBuilder.buildTrunc(DstReg, LoadReg);
4170
4171 LoadMI.eraseFromParent();
4172 return Legalized;
4173 }
4174
4175 // Big endian lowering not implemented.
4176 if (MIRBuilder.getDataLayout().isBigEndian())
4177 return UnableToLegalize;
4178
4179 // This load needs splitting into power of 2 sized loads.
4180 //
4181 // Our strategy here is to generate anyextending loads for the smaller
4182 // types up to next power-2 result type, and then combine the two larger
4183 // result values together, before truncating back down to the non-pow-2
4184 // type.
4185 // E.g. v1 = i24 load =>
4186 // v2 = i32 zextload (2 byte)
4187 // v3 = i32 load (1 byte)
4188 // v4 = i32 shl v3, 16
4189 // v5 = i32 or v4, v2
4190 // v1 = i24 trunc v5
4191 // By doing this we generate the correct truncate which should get
4192 // combined away as an artifact with a matching extend.
4193
4194 uint64_t LargeSplitSize, SmallSplitSize;
4195
4196 if (!isPowerOf2_32(MemSizeInBits)) {
4197 // This load needs splitting into power of 2 sized loads.
4198 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4199 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4200 } else {
4201 // This is already a power of 2, but we still need to split this in half.
4202 //
4203 // Assume we're being asked to decompose an unaligned load.
4204 // TODO: If this requires multiple splits, handle them all at once.
4205 auto &Ctx = MF.getFunction().getContext();
4206 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4207 return UnableToLegalize;
4208
4209 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4210 }
4211
4212 if (MemTy.isVector()) {
4213 // TODO: Handle vector extloads
4214 if (MemTy != DstTy)
4215 return UnableToLegalize;
4216
4217 Align Alignment = LoadMI.getAlign();
4218 // Given an alignment larger than the size of the memory, we can increase
4219 // the size of the load without needing to scalarize it.
4220 if (Alignment.value() * 8 > MemSizeInBits &&
4223 DstTy.getElementType());
4224 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4225 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4226 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4227 NewLoad.getReg(0));
4228 LoadMI.eraseFromParent();
4229 return Legalized;
4230 }
4231
4232 // TODO: We can do better than scalarizing the vector and at least split it
4233 // in half.
4234 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4235 }
4236
4237 MachineMemOperand *LargeMMO =
4238 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4239 MachineMemOperand *SmallMMO =
4240 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4241
4242 LLT PtrTy = MRI.getType(PtrReg);
4243 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4244 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4245 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4246 PtrReg, *LargeMMO);
4247
4248 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4249 LargeSplitSize / 8);
4250 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4251 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4252 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4253 SmallPtr, *SmallMMO);
4254
4255 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4256 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4257
4258 if (AnyExtTy == DstTy)
4259 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4260 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4261 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4262 MIRBuilder.buildTrunc(DstReg, {Or});
4263 } else {
4264 assert(DstTy.isPointer() && "expected pointer");
4265 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4266
4267 // FIXME: We currently consider this to be illegal for non-integral address
4268 // spaces, but we need still need a way to reinterpret the bits.
4269 MIRBuilder.buildIntToPtr(DstReg, Or);
4270 }
4271
4272 LoadMI.eraseFromParent();
4273 return Legalized;
4274}
4275
4277 // Lower a non-power of 2 store into multiple pow-2 stores.
4278 // E.g. split an i24 store into an i16 store + i8 store.
4279 // We do this by first extending the stored value to the next largest power
4280 // of 2 type, and then using truncating stores to store the components.
4281 // By doing this, likewise with G_LOAD, generate an extend that can be
4282 // artifact-combined away instead of leaving behind extracts.
4283 Register SrcReg = StoreMI.getValueReg();
4284 Register PtrReg = StoreMI.getPointerReg();
4285 LLT SrcTy = MRI.getType(SrcReg);
4286 MachineFunction &MF = MIRBuilder.getMF();
4287 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4288 LLT MemTy = MMO.getMemoryType();
4289
4290 unsigned StoreWidth = MemTy.getSizeInBits();
4291 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4292
4293 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4294 // Promote to a byte-sized store with upper bits zero if not
4295 // storing an integral number of bytes. For example, promote
4296 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4297 LLT WideTy = LLT::scalar(StoreSizeInBits);
4298
4299 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4300 // Avoid creating a store with a narrower source than result.
4301 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4302 SrcTy = WideTy;
4303 }
4304
4305 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4306
4307 MachineMemOperand *NewMMO =
4308 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4309 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4310 StoreMI.eraseFromParent();
4311 return Legalized;
4312 }
4313
4314 if (MemTy.isVector()) {
4315 if (MemTy != SrcTy)
4316 return scalarizeVectorBooleanStore(StoreMI);
4317
4318 // TODO: We can do better than scalarizing the vector and at least split it
4319 // in half.
4320 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4321 }
4322
4323 unsigned MemSizeInBits = MemTy.getSizeInBits();
4324 uint64_t LargeSplitSize, SmallSplitSize;
4325
4326 if (!isPowerOf2_32(MemSizeInBits)) {
4327 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4328 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4329 } else {
4330 auto &Ctx = MF.getFunction().getContext();
4331 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4332 return UnableToLegalize; // Don't know what we're being asked to do.
4333
4334 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4335 }
4336
4337 // Extend to the next pow-2. If this store was itself the result of lowering,
4338 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4339 // that's wider than the stored size.
4340 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4341 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4342
4343 if (SrcTy.isPointer()) {
4344 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4345 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4346 }
4347
4348 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4349
4350 // Obtain the smaller value by shifting away the larger value.
4351 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4352 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4353
4354 // Generate the PtrAdd and truncating stores.
4355 LLT PtrTy = MRI.getType(PtrReg);
4356 auto OffsetCst = MIRBuilder.buildConstant(
4357 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4358 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4359
4360 MachineMemOperand *LargeMMO =
4361 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4362 MachineMemOperand *SmallMMO =
4363 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4364 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4365 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4366 StoreMI.eraseFromParent();
4367 return Legalized;
4368}
4369
4372 Register SrcReg = StoreMI.getValueReg();
4373 Register PtrReg = StoreMI.getPointerReg();
4374 LLT SrcTy = MRI.getType(SrcReg);
4375 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4376 LLT MemTy = MMO.getMemoryType();
4377 LLT MemScalarTy = MemTy.getElementType();
4378 MachineFunction &MF = MIRBuilder.getMF();
4379
4380 assert(SrcTy.isVector() && "Expect a vector store type");
4381
4382 if (!MemScalarTy.isByteSized()) {
4383 // We need to build an integer scalar of the vector bit pattern.
4384 // It's not legal for us to add padding when storing a vector.
4385 unsigned NumBits = MemTy.getSizeInBits();
4386 LLT IntTy = LLT::scalar(NumBits);
4387 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4388 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4389
4390 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4391 auto Elt = MIRBuilder.buildExtractVectorElement(
4392 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4393 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4394 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4395 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4396 ? (MemTy.getNumElements() - 1) - I
4397 : I;
4398 auto ShiftAmt = MIRBuilder.buildConstant(
4399 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4400 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4401 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4402 }
4403 auto PtrInfo = MMO.getPointerInfo();
4404 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4405 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4406 StoreMI.eraseFromParent();
4407 return Legalized;
4408 }
4409
4410 // TODO: implement simple scalarization.
4411 return UnableToLegalize;
4412}
4413
4415LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4416 switch (MI.getOpcode()) {
4417 case TargetOpcode::G_LOAD: {
4418 if (TypeIdx != 0)
4419 return UnableToLegalize;
4420 MachineMemOperand &MMO = **MI.memoperands_begin();
4421
4422 // Not sure how to interpret a bitcast of an extending load.
4423 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4424 return UnableToLegalize;
4425
4426 Observer.changingInstr(MI);
4427 bitcastDst(MI, CastTy, 0);
4428 MMO.setType(CastTy);
4429 // The range metadata is no longer valid when reinterpreted as a different
4430 // type.
4431 MMO.clearRanges();
4432 Observer.changedInstr(MI);
4433 return Legalized;
4434 }
4435 case TargetOpcode::G_STORE: {
4436 if (TypeIdx != 0)
4437 return UnableToLegalize;
4438
4439 MachineMemOperand &MMO = **MI.memoperands_begin();
4440
4441 // Not sure how to interpret a bitcast of a truncating store.
4442 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4443 return UnableToLegalize;
4444
4445 Observer.changingInstr(MI);
4446 bitcastSrc(MI, CastTy, 0);
4447 MMO.setType(CastTy);
4448 Observer.changedInstr(MI);
4449 return Legalized;
4450 }
4451 case TargetOpcode::G_SELECT: {
4452 if (TypeIdx != 0)
4453 return UnableToLegalize;
4454
4455 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4456 LLVM_DEBUG(
4457 dbgs() << "bitcast action not implemented for vector select\n");
4458 return UnableToLegalize;
4459 }
4460
4461 Observer.changingInstr(MI);
4462 bitcastSrc(MI, CastTy, 2);
4463 bitcastSrc(MI, CastTy, 3);
4464 bitcastDst(MI, CastTy, 0);
4465 Observer.changedInstr(MI);
4466 return Legalized;
4467 }
4468 case TargetOpcode::G_AND:
4469 case TargetOpcode::G_OR:
4470 case TargetOpcode::G_XOR: {
4471 Observer.changingInstr(MI);
4472 bitcastSrc(MI, CastTy, 1);
4473 bitcastSrc(MI, CastTy, 2);
4474 bitcastDst(MI, CastTy, 0);
4475 Observer.changedInstr(MI);
4476 return Legalized;
4477 }
4478 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4479 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4480 case TargetOpcode::G_INSERT_VECTOR_ELT:
4481 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4482 case TargetOpcode::G_CONCAT_VECTORS:
4483 return bitcastConcatVector(MI, TypeIdx, CastTy);
4484 case TargetOpcode::G_SHUFFLE_VECTOR:
4485 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4486 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4487 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4488 case TargetOpcode::G_INSERT_SUBVECTOR:
4489 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4490 default:
4491 return UnableToLegalize;
4492 }
4493}
4494
4495// Legalize an instruction by changing the opcode in place.
4496void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4498 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4500}
4501
4503LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4504 using namespace TargetOpcode;
4505
4506 switch(MI.getOpcode()) {
4507 default:
4508 return UnableToLegalize;
4509 case TargetOpcode::G_FCONSTANT:
4510 return lowerFConstant(MI);
4511 case TargetOpcode::G_BITCAST:
4512 return lowerBitcast(MI);
4513 case TargetOpcode::G_SREM:
4514 case TargetOpcode::G_UREM: {
4515 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4516 auto Quot =
4517 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4518 {MI.getOperand(1), MI.getOperand(2)});
4519
4520 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4521 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4522 MI.eraseFromParent();
4523 return Legalized;
4524 }
4525 case TargetOpcode::G_SADDO:
4526 case TargetOpcode::G_SSUBO:
4527 return lowerSADDO_SSUBO(MI);
4528 case TargetOpcode::G_SADDE:
4529 return lowerSADDE(MI);
4530 case TargetOpcode::G_SSUBE:
4531 return lowerSSUBE(MI);
4532 case TargetOpcode::G_UMULH:
4533 case TargetOpcode::G_SMULH:
4534 return lowerSMULH_UMULH(MI);
4535 case TargetOpcode::G_SMULO:
4536 case TargetOpcode::G_UMULO: {
4537 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4538 // result.
4539 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4540 LLT Ty = MRI.getType(Res);
4541
4542 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4543 ? TargetOpcode::G_SMULH
4544 : TargetOpcode::G_UMULH;
4545
4546 Observer.changingInstr(MI);
4547 const auto &TII = MIRBuilder.getTII();
4548 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4549 MI.removeOperand(1);
4550 Observer.changedInstr(MI);
4551
4552 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4553 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4554
4555 // Move insert point forward so we can use the Res register if needed.
4556 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4557
4558 // For *signed* multiply, overflow is detected by checking:
4559 // (hi != (lo >> bitwidth-1))
4560 if (Opcode == TargetOpcode::G_SMULH) {
4561 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4562 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4563 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4564 } else {
4565 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4566 }
4567 return Legalized;
4568 }
4569 case TargetOpcode::G_FNEG: {
4570 auto [Res, SubByReg] = MI.getFirst2Regs();
4571 LLT Ty = MRI.getType(Res);
4572
4573 auto SignMask = MIRBuilder.buildConstant(
4574 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4575 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4576 MI.eraseFromParent();
4577 return Legalized;
4578 }
4579 case TargetOpcode::G_FSUB:
4580 case TargetOpcode::G_STRICT_FSUB: {
4581 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4582 LLT Ty = MRI.getType(Res);
4583
4584 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4585 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4586
4587 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4588 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4589 else
4590 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4591
4592 MI.eraseFromParent();
4593 return Legalized;
4594 }
4595 case TargetOpcode::G_FMAD:
4596 return lowerFMad(MI);
4597 case TargetOpcode::G_FFLOOR:
4598 return lowerFFloor(MI);
4599 case TargetOpcode::G_LROUND:
4600 case TargetOpcode::G_LLROUND: {
4601 Register DstReg = MI.getOperand(0).getReg();
4602 Register SrcReg = MI.getOperand(1).getReg();
4603 LLT SrcTy = MRI.getType(SrcReg);
4604 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4605 {SrcReg});
4606 MIRBuilder.buildFPTOSI(DstReg, Round);
4607 MI.eraseFromParent();
4608 return Legalized;
4609 }
4610 case TargetOpcode::G_INTRINSIC_ROUND:
4611 return lowerIntrinsicRound(MI);
4612 case TargetOpcode::G_FRINT: {
4613 // Since round even is the assumed rounding mode for unconstrained FP
4614 // operations, rint and roundeven are the same operation.
4615 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4616 return Legalized;
4617 }
4618 case TargetOpcode::G_INTRINSIC_LRINT:
4619 case TargetOpcode::G_INTRINSIC_LLRINT: {
4620 Register DstReg = MI.getOperand(0).getReg();
4621 Register SrcReg = MI.getOperand(1).getReg();
4622 LLT SrcTy = MRI.getType(SrcReg);
4623 auto Round =
4624 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4625 MIRBuilder.buildFPTOSI(DstReg, Round);
4626 MI.eraseFromParent();
4627 return Legalized;
4628 }
4629 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4630 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4631 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4632 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4633 **MI.memoperands_begin());
4634 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4635 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4636 MI.eraseFromParent();
4637 return Legalized;
4638 }
4639 case TargetOpcode::G_LOAD:
4640 case TargetOpcode::G_SEXTLOAD:
4641 case TargetOpcode::G_ZEXTLOAD:
4642 return lowerLoad(cast<GAnyLoad>(MI));
4643 case TargetOpcode::G_STORE:
4644 return lowerStore(cast<GStore>(MI));
4645 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4646 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4647 case TargetOpcode::G_CTLZ:
4648 case TargetOpcode::G_CTTZ:
4649 case TargetOpcode::G_CTPOP:
4650 return lowerBitCount(MI);
4651 case G_UADDO: {
4652 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4653
4654 Register NewRes = MRI.cloneVirtualRegister(Res);
4655
4656 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4657 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4658
4659 MIRBuilder.buildCopy(Res, NewRes);
4660
4661 MI.eraseFromParent();
4662 return Legalized;
4663 }
4664 case G_UADDE: {
4665 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4666 const LLT CondTy = MRI.getType(CarryOut);
4667 const LLT Ty = MRI.getType(Res);
4668
4669 Register NewRes = MRI.cloneVirtualRegister(Res);
4670
4671 // Initial add of the two operands.
4672 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4673
4674 // Initial check for carry.
4675 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4676
4677 // Add the sum and the carry.
4678 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4679 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4680
4681 // Second check for carry. We can only carry if the initial sum is all 1s
4682 // and the carry is set, resulting in a new sum of 0.
4683 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4684 auto ResEqZero =
4685 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4686 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4687 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4688
4689 MIRBuilder.buildCopy(Res, NewRes);
4690
4691 MI.eraseFromParent();
4692 return Legalized;
4693 }
4694 case G_USUBO: {
4695 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4696
4697 MIRBuilder.buildSub(Res, LHS, RHS);
4698 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4699
4700 MI.eraseFromParent();
4701 return Legalized;
4702 }
4703 case G_USUBE: {
4704 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4705 const LLT CondTy = MRI.getType(BorrowOut);
4706 const LLT Ty = MRI.getType(Res);
4707
4708 // Initial subtract of the two operands.
4709 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4710
4711 // Initial check for borrow.
4712 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4713
4714 // Subtract the borrow from the first subtract.
4715 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4716 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4717
4718 // Second check for borrow. We can only borrow if the initial difference is
4719 // 0 and the borrow is set, resulting in a new difference of all 1s.
4720 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4721 auto TmpResEqZero =
4722 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4723 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4724 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4725
4726 MI.eraseFromParent();
4727 return Legalized;
4728 }
4729 case G_UITOFP:
4730 return lowerUITOFP(MI);
4731 case G_SITOFP:
4732 return lowerSITOFP(MI);
4733 case G_FPTOUI:
4734 return lowerFPTOUI(MI);
4735 case G_FPTOSI:
4736 return lowerFPTOSI(MI);
4737 case G_FPTOUI_SAT:
4738 case G_FPTOSI_SAT:
4739 return lowerFPTOINT_SAT(MI);
4740 case G_FPTRUNC:
4741 return lowerFPTRUNC(MI);
4742 case G_FPOWI:
4743 return lowerFPOWI(MI);
4744 case G_SMIN:
4745 case G_SMAX:
4746 case G_UMIN:
4747 case G_UMAX:
4748 return lowerMinMax(MI);
4749 case G_SCMP:
4750 case G_UCMP:
4751 return lowerThreewayCompare(MI);
4752 case G_FCOPYSIGN:
4753 return lowerFCopySign(MI);
4754 case G_FMINNUM:
4755 case G_FMAXNUM:
4756 case G_FMINIMUMNUM:
4757 case G_FMAXIMUMNUM:
4758 return lowerFMinNumMaxNum(MI);
4759 case G_FMINIMUM:
4760 case G_FMAXIMUM:
4761 return lowerFMinimumMaximum(MI);
4762 case G_MERGE_VALUES:
4763 return lowerMergeValues(MI);
4764 case G_UNMERGE_VALUES:
4765 return lowerUnmergeValues(MI);
4766 case TargetOpcode::G_SEXT_INREG: {
4767 assert(MI.getOperand(2).isImm() && "Expected immediate");
4768 int64_t SizeInBits = MI.getOperand(2).getImm();
4769
4770 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4771 LLT DstTy = MRI.getType(DstReg);
4772 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4773
4774 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4775 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4776 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4777 MI.eraseFromParent();
4778 return Legalized;
4779 }
4780 case G_EXTRACT_VECTOR_ELT:
4781 case G_INSERT_VECTOR_ELT:
4783 case G_SHUFFLE_VECTOR:
4784 return lowerShuffleVector(MI);
4785 case G_VECTOR_COMPRESS:
4786 return lowerVECTOR_COMPRESS(MI);
4787 case G_DYN_STACKALLOC:
4788 return lowerDynStackAlloc(MI);
4789 case G_STACKSAVE:
4790 return lowerStackSave(MI);
4791 case G_STACKRESTORE:
4792 return lowerStackRestore(MI);
4793 case G_EXTRACT:
4794 return lowerExtract(MI);
4795 case G_INSERT:
4796 return lowerInsert(MI);
4797 case G_BSWAP:
4798 return lowerBswap(MI);
4799 case G_BITREVERSE:
4800 return lowerBitreverse(MI);
4801 case G_READ_REGISTER:
4802 case G_WRITE_REGISTER:
4803 return lowerReadWriteRegister(MI);
4804 case G_UADDSAT:
4805 case G_USUBSAT: {
4806 // Try to make a reasonable guess about which lowering strategy to use. The
4807 // target can override this with custom lowering and calling the
4808 // implementation functions.
4809 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4810 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4811 return lowerAddSubSatToMinMax(MI);
4813 }
4814 case G_SADDSAT:
4815 case G_SSUBSAT: {
4816 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4817
4818 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4819 // since it's a shorter expansion. However, we would need to figure out the
4820 // preferred boolean type for the carry out for the query.
4821 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4822 return lowerAddSubSatToMinMax(MI);
4824 }
4825 case G_SSHLSAT:
4826 case G_USHLSAT:
4827 return lowerShlSat(MI);
4828 case G_ABS:
4829 return lowerAbsToAddXor(MI);
4830 case G_ABDS:
4831 case G_ABDU: {
4832 bool IsSigned = MI.getOpcode() == G_ABDS;
4833 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4834 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4835 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4836 return lowerAbsDiffToMinMax(MI);
4837 }
4838 return lowerAbsDiffToSelect(MI);
4839 }
4840 case G_FABS:
4841 return lowerFAbs(MI);
4842 case G_SELECT:
4843 return lowerSelect(MI);
4844 case G_IS_FPCLASS:
4845 return lowerISFPCLASS(MI);
4846 case G_SDIVREM:
4847 case G_UDIVREM:
4848 return lowerDIVREM(MI);
4849 case G_FSHL:
4850 case G_FSHR:
4851 return lowerFunnelShift(MI);
4852 case G_ROTL:
4853 case G_ROTR:
4854 return lowerRotate(MI);
4855 case G_MEMSET:
4856 case G_MEMCPY:
4857 case G_MEMMOVE:
4858 return lowerMemCpyFamily(MI);
4859 case G_MEMCPY_INLINE:
4860 return lowerMemcpyInline(MI);
4861 case G_ZEXT:
4862 case G_SEXT:
4863 case G_ANYEXT:
4864 return lowerEXT(MI);
4865 case G_TRUNC:
4866 return lowerTRUNC(MI);
4868 return lowerVectorReduction(MI);
4869 case G_VAARG:
4870 return lowerVAArg(MI);
4871 case G_ATOMICRMW_SUB: {
4872 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4873 const LLT ValTy = MRI.getType(Val);
4874 MachineMemOperand *MMO = *MI.memoperands_begin();
4875
4876 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4877 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4878 MI.eraseFromParent();
4879 return Legalized;
4880 }
4881 }
4882}
4883
4885 Align MinAlign) const {
4886 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4887 // datalayout for the preferred alignment. Also there should be a target hook
4888 // for this to allow targets to reduce the alignment and ignore the
4889 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4890 // the type.
4891 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4892}
4893
4896 MachinePointerInfo &PtrInfo) {
4897 MachineFunction &MF = MIRBuilder.getMF();
4898 const DataLayout &DL = MIRBuilder.getDataLayout();
4899 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4900
4901 unsigned AddrSpace = DL.getAllocaAddrSpace();
4902 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4903
4904 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4905 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4906}
4907
4909 const SrcOp &Val) {
4910 LLT SrcTy = Val.getLLTTy(MRI);
4911 Align StackTypeAlign =
4912 std::max(getStackTemporaryAlignment(SrcTy),
4914 MachinePointerInfo PtrInfo;
4915 auto StackTemp =
4916 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4917
4918 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4919 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4920}
4921
4923 LLT VecTy) {
4924 LLT IdxTy = B.getMRI()->getType(IdxReg);
4925 unsigned NElts = VecTy.getNumElements();
4926
4927 int64_t IdxVal;
4928 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4929 if (IdxVal < VecTy.getNumElements())
4930 return IdxReg;
4931 // If a constant index would be out of bounds, clamp it as well.
4932 }
4933
4934 if (isPowerOf2_32(NElts)) {
4935 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4936 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4937 }
4938
4939 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4940 .getReg(0);
4941}
4942
4944 Register Index) {
4945 LLT EltTy = VecTy.getElementType();
4946
4947 // Calculate the element offset and add it to the pointer.
4948 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4949 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4950 "Converting bits to bytes lost precision");
4951
4952 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4953
4954 // Convert index to the correct size for the address space.
4955 const DataLayout &DL = MIRBuilder.getDataLayout();
4956 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4957 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4958 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4959 if (IdxTy != MRI.getType(Index))
4960 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4961
4962 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4963 MIRBuilder.buildConstant(IdxTy, EltSize));
4964
4965 LLT PtrTy = MRI.getType(VecPtr);
4966 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4967}
4968
4969#ifndef NDEBUG
4970/// Check that all vector operands have same number of elements. Other operands
4971/// should be listed in NonVecOp.
4974 std::initializer_list<unsigned> NonVecOpIndices) {
4975 if (MI.getNumMemOperands() != 0)
4976 return false;
4977
4978 LLT VecTy = MRI.getType(MI.getReg(0));
4979 if (!VecTy.isVector())
4980 return false;
4981 unsigned NumElts = VecTy.getNumElements();
4982
4983 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4984 MachineOperand &Op = MI.getOperand(OpIdx);
4985 if (!Op.isReg()) {
4986 if (!is_contained(NonVecOpIndices, OpIdx))
4987 return false;
4988 continue;
4989 }
4990
4991 LLT Ty = MRI.getType(Op.getReg());
4992 if (!Ty.isVector()) {
4993 if (!is_contained(NonVecOpIndices, OpIdx))
4994 return false;
4995 continue;
4996 }
4997
4998 if (Ty.getNumElements() != NumElts)
4999 return false;
5000 }
5001
5002 return true;
5003}
5004#endif
5005
5006/// Fill \p DstOps with DstOps that have same number of elements combined as
5007/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5008/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5009/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5010static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5011 unsigned NumElts) {
5012 LLT LeftoverTy;
5013 assert(Ty.isVector() && "Expected vector type");
5014 LLT EltTy = Ty.getElementType();
5015 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
5016 int NumParts, NumLeftover;
5017 std::tie(NumParts, NumLeftover) =
5018 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5019
5020 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5021 for (int i = 0; i < NumParts; ++i) {
5022 DstOps.push_back(NarrowTy);
5023 }
5024
5025 if (LeftoverTy.isValid()) {
5026 assert(NumLeftover == 1 && "expected exactly one leftover");
5027 DstOps.push_back(LeftoverTy);
5028 }
5029}
5030
5031/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5032/// made from \p Op depending on operand type.
5034 MachineOperand &Op) {
5035 for (unsigned i = 0; i < N; ++i) {
5036 if (Op.isReg())
5037 Ops.push_back(Op.getReg());
5038 else if (Op.isImm())
5039 Ops.push_back(Op.getImm());
5040 else if (Op.isPredicate())
5041 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5042 else
5043 llvm_unreachable("Unsupported type");
5044 }
5045}
5046
5047// Handle splitting vector operations which need to have the same number of
5048// elements in each type index, but each type index may have a different element
5049// type.
5050//
5051// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5052// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5053// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5054//
5055// Also handles some irregular breakdown cases, e.g.
5056// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5057// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5058// s64 = G_SHL s64, s32
5061 GenericMachineInstr &MI, unsigned NumElts,
5062 std::initializer_list<unsigned> NonVecOpIndices) {
5063 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5064 "Non-compatible opcode or not specified non-vector operands");
5065 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5066
5067 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5068 unsigned NumDefs = MI.getNumDefs();
5069
5070 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5071 // Build instructions with DstOps to use instruction found by CSE directly.
5072 // CSE copies found instruction into given vreg when building with vreg dest.
5073 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5074 // Output registers will be taken from created instructions.
5075 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5076 for (unsigned i = 0; i < NumDefs; ++i) {
5077 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5078 }
5079
5080 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5081 // Operands listed in NonVecOpIndices will be used as is without splitting;
5082 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5083 // scalar condition (op 1), immediate in sext_inreg (op 2).
5084 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5085 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5086 ++UseIdx, ++UseNo) {
5087 if (is_contained(NonVecOpIndices, UseIdx)) {
5088 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5089 MI.getOperand(UseIdx));
5090 } else {
5091 SmallVector<Register, 8> SplitPieces;
5092 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5093 MRI);
5094 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5095 }
5096 }
5097
5098 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5099
5100 // Take i-th piece of each input operand split and build sub-vector/scalar
5101 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5102 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5104 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5105 Defs.push_back(OutputOpsPieces[DstNo][i]);
5106
5108 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5109 Uses.push_back(InputOpsPieces[InputNo][i]);
5110
5111 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5112 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5113 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5114 }
5115
5116 // Merge small outputs into MI's output for each def operand.
5117 if (NumLeftovers) {
5118 for (unsigned i = 0; i < NumDefs; ++i)
5119 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5120 } else {
5121 for (unsigned i = 0; i < NumDefs; ++i)
5122 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5123 }
5124
5125 MI.eraseFromParent();
5126 return Legalized;
5127}
5128
5131 unsigned NumElts) {
5132 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5133
5134 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5135 unsigned NumDefs = MI.getNumDefs();
5136
5137 SmallVector<DstOp, 8> OutputOpsPieces;
5138 SmallVector<Register, 8> OutputRegs;
5139 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5140
5141 // Instructions that perform register split will be inserted in basic block
5142 // where register is defined (basic block is in the next operand).
5143 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5144 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5145 UseIdx += 2, ++UseNo) {
5146 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5147 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5148 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5149 MIRBuilder, MRI);
5150 }
5151
5152 // Build PHIs with fewer elements.
5153 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5154 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5155 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5156 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5157 Phi.addDef(
5158 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5159 OutputRegs.push_back(Phi.getReg(0));
5160
5161 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5162 Phi.addUse(InputOpsPieces[j][i]);
5163 Phi.add(MI.getOperand(1 + j * 2 + 1));
5164 }
5165 }
5166
5167 // Set the insert point after the existing PHIs
5168 MachineBasicBlock &MBB = *MI.getParent();
5169 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5170
5171 // Merge small outputs into MI's def.
5172 if (NumLeftovers) {
5173 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5174 } else {
5175 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5176 }
5177
5178 MI.eraseFromParent();
5179 return Legalized;
5180}
5181
5184 unsigned TypeIdx,
5185 LLT NarrowTy) {
5186 const int NumDst = MI.getNumOperands() - 1;
5187 const Register SrcReg = MI.getOperand(NumDst).getReg();
5188 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5189 LLT SrcTy = MRI.getType(SrcReg);
5190
5191 if (TypeIdx != 1 || NarrowTy == DstTy)
5192 return UnableToLegalize;
5193
5194 // Requires compatible types. Otherwise SrcReg should have been defined by
5195 // merge-like instruction that would get artifact combined. Most likely
5196 // instruction that defines SrcReg has to perform more/fewer elements
5197 // legalization compatible with NarrowTy.
5198 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5199 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5200
5201 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5202 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5203 return UnableToLegalize;
5204
5205 // This is most likely DstTy (smaller then register size) packed in SrcTy
5206 // (larger then register size) and since unmerge was not combined it will be
5207 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5208 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5209
5210 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5211 //
5212 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5213 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5214 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5215 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5216 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5217 const int PartsPerUnmerge = NumDst / NumUnmerge;
5218
5219 for (int I = 0; I != NumUnmerge; ++I) {
5220 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5221
5222 for (int J = 0; J != PartsPerUnmerge; ++J)
5223 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5224 MIB.addUse(Unmerge.getReg(I));
5225 }
5226
5227 MI.eraseFromParent();
5228 return Legalized;
5229}
5230
5233 LLT NarrowTy) {
5234 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5235 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5236 // that should have been artifact combined. Most likely instruction that uses
5237 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5238 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5239 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5240 if (NarrowTy == SrcTy)
5241 return UnableToLegalize;
5242
5243 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5244 // is for old mir tests. Since the changes to more/fewer elements it should no
5245 // longer be possible to generate MIR like this when starting from llvm-ir
5246 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5247 if (TypeIdx == 1) {
5248 assert(SrcTy.isVector() && "Expected vector types");
5249 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5250 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5251 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5252 return UnableToLegalize;
5253 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5254 //
5255 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5256 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5257 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5258 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5259 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5260 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5261
5263 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5264 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5265 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5266 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5267 Elts.push_back(Unmerge.getReg(j));
5268 }
5269
5270 SmallVector<Register, 8> NarrowTyElts;
5271 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5272 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5273 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5274 ++i, Offset += NumNarrowTyElts) {
5275 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5276 NarrowTyElts.push_back(
5277 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5278 }
5279
5280 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5281 MI.eraseFromParent();
5282 return Legalized;
5283 }
5284
5285 assert(TypeIdx == 0 && "Bad type index");
5286 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5287 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5288 return UnableToLegalize;
5289
5290 // This is most likely SrcTy (smaller then register size) packed in DstTy
5291 // (larger then register size) and since merge was not combined it will be
5292 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5293 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5294
5295 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5296 //
5297 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5298 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5299 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5300 SmallVector<Register, 8> NarrowTyElts;
5301 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5302 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5303 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5304 for (unsigned i = 0; i < NumParts; ++i) {
5306 for (unsigned j = 0; j < NumElts; ++j)
5307 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5308 NarrowTyElts.push_back(
5309 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5310 }
5311
5312 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5313 MI.eraseFromParent();
5314 return Legalized;
5315}
5316
5319 unsigned TypeIdx,
5320 LLT NarrowVecTy) {
5321 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5322 Register InsertVal;
5323 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5324
5325 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5326 if (IsInsert)
5327 InsertVal = MI.getOperand(2).getReg();
5328
5329 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5330 LLT VecTy = MRI.getType(SrcVec);
5331
5332 // If the index is a constant, we can really break this down as you would
5333 // expect, and index into the target size pieces.
5334 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5335 if (MaybeCst) {
5336 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5337 // Avoid out of bounds indexing the pieces.
5338 if (IdxVal >= VecTy.getNumElements()) {
5339 MIRBuilder.buildUndef(DstReg);
5340 MI.eraseFromParent();
5341 return Legalized;
5342 }
5343
5344 if (!NarrowVecTy.isVector()) {
5345 SmallVector<Register, 8> SplitPieces;
5346 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5347 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5348 if (IsInsert) {
5349 SplitPieces[IdxVal] = InsertVal;
5350 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5351 } else {
5352 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5353 }
5354 } else {
5355 SmallVector<Register, 8> VecParts;
5356 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5357
5358 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5359 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5360 TargetOpcode::G_ANYEXT);
5361
5362 unsigned NewNumElts = NarrowVecTy.getNumElements();
5363
5364 LLT IdxTy = MRI.getType(Idx);
5365 int64_t PartIdx = IdxVal / NewNumElts;
5366 auto NewIdx =
5367 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5368
5369 if (IsInsert) {
5370 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5371
5372 // Use the adjusted index to insert into one of the subvectors.
5373 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5374 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5375 VecParts[PartIdx] = InsertPart.getReg(0);
5376
5377 // Recombine the inserted subvector with the others to reform the result
5378 // vector.
5379 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5380 } else {
5381 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5382 }
5383 }
5384
5385 MI.eraseFromParent();
5386 return Legalized;
5387 }
5388
5389 // With a variable index, we can't perform the operation in a smaller type, so
5390 // we're forced to expand this.
5391 //
5392 // TODO: We could emit a chain of compare/select to figure out which piece to
5393 // index.
5395}
5396
5399 LLT NarrowTy) {
5400 // FIXME: Don't know how to handle secondary types yet.
5401 if (TypeIdx != 0)
5402 return UnableToLegalize;
5403
5404 if (!NarrowTy.isByteSized()) {
5405 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5406 return UnableToLegalize;
5407 }
5408
5409 // This implementation doesn't work for atomics. Give up instead of doing
5410 // something invalid.
5411 if (LdStMI.isAtomic())
5412 return UnableToLegalize;
5413
5414 bool IsLoad = isa<GLoad>(LdStMI);
5415 Register ValReg = LdStMI.getReg(0);
5416 Register AddrReg = LdStMI.getPointerReg();
5417 LLT ValTy = MRI.getType(ValReg);
5418
5419 // FIXME: Do we need a distinct NarrowMemory legalize action?
5420 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5421 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5422 return UnableToLegalize;
5423 }
5424
5425 int NumParts = -1;
5426 int NumLeftover = -1;
5427 LLT LeftoverTy;
5428 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5429 if (IsLoad) {
5430 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5431 } else {
5432 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5433 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5434 NumParts = NarrowRegs.size();
5435 NumLeftover = NarrowLeftoverRegs.size();
5436 }
5437 }
5438
5439 if (NumParts == -1)
5440 return UnableToLegalize;
5441
5442 LLT PtrTy = MRI.getType(AddrReg);
5443 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5444
5445 unsigned TotalSize = ValTy.getSizeInBits();
5446
5447 // Split the load/store into PartTy sized pieces starting at Offset. If this
5448 // is a load, return the new registers in ValRegs. For a store, each elements
5449 // of ValRegs should be PartTy. Returns the next offset that needs to be
5450 // handled.
5451 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5452 auto MMO = LdStMI.getMMO();
5453 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5454 unsigned NumParts, unsigned Offset) -> unsigned {
5455 MachineFunction &MF = MIRBuilder.getMF();
5456 unsigned PartSize = PartTy.getSizeInBits();
5457 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5458 ++Idx) {
5459 unsigned ByteOffset = Offset / 8;
5460 Register NewAddrReg;
5461
5462 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5463 ByteOffset);
5464
5465 MachineMemOperand *NewMMO =
5466 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5467
5468 if (IsLoad) {
5469 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5470 ValRegs.push_back(Dst);
5471 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5472 } else {
5473 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5474 }
5475 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5476 }
5477
5478 return Offset;
5479 };
5480
5481 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5482 unsigned HandledOffset =
5483 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5484
5485 // Handle the rest of the register if this isn't an even type breakdown.
5486 if (LeftoverTy.isValid())
5487 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5488
5489 if (IsLoad) {
5490 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5491 LeftoverTy, NarrowLeftoverRegs);
5492 }
5493
5494 LdStMI.eraseFromParent();
5495 return Legalized;
5496}
5497
5500 LLT NarrowTy) {
5501 using namespace TargetOpcode;
5503 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5504
5505 switch (MI.getOpcode()) {
5506 case G_IMPLICIT_DEF:
5507 case G_TRUNC:
5508 case G_AND:
5509 case G_OR:
5510 case G_XOR:
5511 case G_ADD:
5512 case G_SUB:
5513 case G_MUL:
5514 case G_PTR_ADD:
5515 case G_SMULH:
5516 case G_UMULH:
5517 case G_FADD:
5518 case G_FMUL:
5519 case G_FSUB:
5520 case G_FNEG:
5521 case G_FABS:
5522 case G_FCANONICALIZE:
5523 case G_FDIV:
5524 case G_FREM:
5525 case G_FMA:
5526 case G_FMAD:
5527 case G_FPOW:
5528 case G_FEXP:
5529 case G_FEXP2:
5530 case G_FEXP10:
5531 case G_FLOG:
5532 case G_FLOG2:
5533 case G_FLOG10:
5534 case G_FLDEXP:
5535 case G_FNEARBYINT:
5536 case G_FCEIL:
5537 case G_FFLOOR:
5538 case G_FRINT:
5539 case G_INTRINSIC_LRINT:
5540 case G_INTRINSIC_LLRINT:
5541 case G_INTRINSIC_ROUND:
5542 case G_INTRINSIC_ROUNDEVEN:
5543 case G_LROUND:
5544 case G_LLROUND:
5545 case G_INTRINSIC_TRUNC:
5546 case G_FMODF:
5547 case G_FCOS:
5548 case G_FSIN:
5549 case G_FTAN:
5550 case G_FACOS:
5551 case G_FASIN:
5552 case G_FATAN:
5553 case G_FATAN2:
5554 case G_FCOSH:
5555 case G_FSINH:
5556 case G_FTANH:
5557 case G_FSQRT:
5558 case G_BSWAP:
5559 case G_BITREVERSE:
5560 case G_SDIV:
5561 case G_UDIV:
5562 case G_SREM:
5563 case G_UREM:
5564 case G_SDIVREM:
5565 case G_UDIVREM:
5566 case G_SMIN:
5567 case G_SMAX:
5568 case G_UMIN:
5569 case G_UMAX:
5570 case G_ABS:
5571 case G_FMINNUM:
5572 case G_FMAXNUM:
5573 case G_FMINNUM_IEEE:
5574 case G_FMAXNUM_IEEE:
5575 case G_FMINIMUM:
5576 case G_FMAXIMUM:
5577 case G_FMINIMUMNUM:
5578 case G_FMAXIMUMNUM:
5579 case G_FSHL:
5580 case G_FSHR:
5581 case G_ROTL:
5582 case G_ROTR:
5583 case G_FREEZE:
5584 case G_SADDSAT:
5585 case G_SSUBSAT:
5586 case G_UADDSAT:
5587 case G_USUBSAT:
5588 case G_UMULO:
5589 case G_SMULO:
5590 case G_SHL:
5591 case G_LSHR:
5592 case G_ASHR:
5593 case G_SSHLSAT:
5594 case G_USHLSAT:
5595 case G_CTLZ:
5596 case G_CTLZ_ZERO_UNDEF:
5597 case G_CTTZ:
5598 case G_CTTZ_ZERO_UNDEF:
5599 case G_CTPOP:
5600 case G_FCOPYSIGN:
5601 case G_ZEXT:
5602 case G_SEXT:
5603 case G_ANYEXT:
5604 case G_FPEXT:
5605 case G_FPTRUNC:
5606 case G_SITOFP:
5607 case G_UITOFP:
5608 case G_FPTOSI:
5609 case G_FPTOUI:
5610 case G_FPTOSI_SAT:
5611 case G_FPTOUI_SAT:
5612 case G_INTTOPTR:
5613 case G_PTRTOINT:
5614 case G_ADDRSPACE_CAST:
5615 case G_UADDO:
5616 case G_USUBO:
5617 case G_UADDE:
5618 case G_USUBE:
5619 case G_SADDO:
5620 case G_SSUBO:
5621 case G_SADDE:
5622 case G_SSUBE:
5623 case G_STRICT_FADD:
5624 case G_STRICT_FSUB:
5625 case G_STRICT_FMUL:
5626 case G_STRICT_FMA:
5627 case G_STRICT_FLDEXP:
5628 case G_FFREXP:
5629 return fewerElementsVectorMultiEltType(GMI, NumElts);
5630 case G_ICMP:
5631 case G_FCMP:
5632 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5633 case G_IS_FPCLASS:
5634 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5635 case G_SELECT:
5636 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5637 return fewerElementsVectorMultiEltType(GMI, NumElts);
5638 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5639 case G_PHI:
5640 return fewerElementsVectorPhi(GMI, NumElts);
5641 case G_UNMERGE_VALUES:
5642 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5643 case G_BUILD_VECTOR:
5644 assert(TypeIdx == 0 && "not a vector type index");
5645 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5646 case G_CONCAT_VECTORS:
5647 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5648 return UnableToLegalize;
5649 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5650 case G_EXTRACT_VECTOR_ELT:
5651 case G_INSERT_VECTOR_ELT:
5652 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5653 case G_LOAD:
5654 case G_STORE:
5655 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5656 case G_SEXT_INREG:
5657 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5659 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5660 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5661 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5662 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5663 case G_SHUFFLE_VECTOR:
5664 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5665 case G_FPOWI:
5666 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5667 case G_BITCAST:
5668 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5669 case G_INTRINSIC_FPTRUNC_ROUND:
5670 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5671 default:
5672 return UnableToLegalize;
5673 }
5674}
5675
5678 LLT NarrowTy) {
5679 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5680 "Not a bitcast operation");
5681
5682 if (TypeIdx != 0)
5683 return UnableToLegalize;
5684
5685 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5686
5687 unsigned NewElemCount =
5688 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5689 SmallVector<Register> SrcVRegs, BitcastVRegs;
5690 if (NewElemCount == 1) {
5691 LLT SrcNarrowTy = SrcTy.getElementType();
5692
5693 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5694 getUnmergeResults(SrcVRegs, *Unmerge);
5695 } else {
5696 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5697
5698 // Split the Src and Dst Reg into smaller registers
5699 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5700 return UnableToLegalize;
5701 }
5702
5703 // Build new smaller bitcast instructions
5704 // Not supporting Leftover types for now but will have to
5705 for (Register Reg : SrcVRegs)
5706 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5707
5708 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5709 MI.eraseFromParent();
5710 return Legalized;
5711}
5712
5714 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5715 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5716 if (TypeIdx != 0)
5717 return UnableToLegalize;
5718
5719 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5720 MI.getFirst3RegLLTs();
5721 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5722 // The shuffle should be canonicalized by now.
5723 if (DstTy != Src1Ty)
5724 return UnableToLegalize;
5725 if (DstTy != Src2Ty)
5726 return UnableToLegalize;
5727
5728 if (!isPowerOf2_32(DstTy.getNumElements()))
5729 return UnableToLegalize;
5730
5731 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5732 // Further legalization attempts will be needed to do split further.
5733 NarrowTy =
5734 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5735 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5736
5737 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5738 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5739 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5740 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5741 SplitSrc2Regs[1]};
5742
5743 Register Hi, Lo;
5744
5745 // If Lo or Hi uses elements from at most two of the four input vectors, then
5746 // express it as a vector shuffle of those two inputs. Otherwise extract the
5747 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5749 for (unsigned High = 0; High < 2; ++High) {
5750 Register &Output = High ? Hi : Lo;
5751
5752 // Build a shuffle mask for the output, discovering on the fly which
5753 // input vectors to use as shuffle operands (recorded in InputUsed).
5754 // If building a suitable shuffle vector proves too hard, then bail
5755 // out with useBuildVector set.
5756 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5757 unsigned FirstMaskIdx = High * NewElts;
5758 bool UseBuildVector = false;
5759 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5760 // The mask element. This indexes into the input.
5761 int Idx = Mask[FirstMaskIdx + MaskOffset];
5762
5763 // The input vector this mask element indexes into.
5764 unsigned Input = (unsigned)Idx / NewElts;
5765
5766 if (Input >= std::size(Inputs)) {
5767 // The mask element does not index into any input vector.
5768 Ops.push_back(-1);
5769 continue;
5770 }
5771
5772 // Turn the index into an offset from the start of the input vector.
5773 Idx -= Input * NewElts;
5774
5775 // Find or create a shuffle vector operand to hold this input.
5776 unsigned OpNo;
5777 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5778 if (InputUsed[OpNo] == Input) {
5779 // This input vector is already an operand.
5780 break;
5781 } else if (InputUsed[OpNo] == -1U) {
5782 // Create a new operand for this input vector.
5783 InputUsed[OpNo] = Input;
5784 break;
5785 }
5786 }
5787
5788 if (OpNo >= std::size(InputUsed)) {
5789 // More than two input vectors used! Give up on trying to create a
5790 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5791 UseBuildVector = true;
5792 break;
5793 }
5794
5795 // Add the mask index for the new shuffle vector.
5796 Ops.push_back(Idx + OpNo * NewElts);
5797 }
5798
5799 if (UseBuildVector) {
5800 LLT EltTy = NarrowTy.getElementType();
5802
5803 // Extract the input elements by hand.
5804 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5805 // The mask element. This indexes into the input.
5806 int Idx = Mask[FirstMaskIdx + MaskOffset];
5807
5808 // The input vector this mask element indexes into.
5809 unsigned Input = (unsigned)Idx / NewElts;
5810
5811 if (Input >= std::size(Inputs)) {
5812 // The mask element is "undef" or indexes off the end of the input.
5813 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5814 continue;
5815 }
5816
5817 // Turn the index into an offset from the start of the input vector.
5818 Idx -= Input * NewElts;
5819
5820 // Extract the vector element by hand.
5821 SVOps.push_back(MIRBuilder
5822 .buildExtractVectorElement(
5823 EltTy, Inputs[Input],
5824 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5825 .getReg(0));
5826 }
5827
5828 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5829 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5830 } else if (InputUsed[0] == -1U) {
5831 // No input vectors were used! The result is undefined.
5832 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5833 } else if (NewElts == 1) {
5834 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5835 } else {
5836 Register Op0 = Inputs[InputUsed[0]];
5837 // If only one input was used, use an undefined vector for the other.
5838 Register Op1 = InputUsed[1] == -1U
5839 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5840 : Inputs[InputUsed[1]];
5841 // At least one input vector was used. Create a new shuffle vector.
5842 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5843 }
5844
5845 Ops.clear();
5846 }
5847
5848 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5849 MI.eraseFromParent();
5850 return Legalized;
5851}
5852
5854 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5855 auto &RdxMI = cast<GVecReduce>(MI);
5856
5857 if (TypeIdx != 1)
5858 return UnableToLegalize;
5859
5860 // The semantics of the normal non-sequential reductions allow us to freely
5861 // re-associate the operation.
5862 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5863
5864 if (NarrowTy.isVector() &&
5865 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5866 return UnableToLegalize;
5867
5868 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5869 SmallVector<Register> SplitSrcs;
5870 // If NarrowTy is a scalar then we're being asked to scalarize.
5871 const unsigned NumParts =
5872 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5873 : SrcTy.getNumElements();
5874
5875 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5876 if (NarrowTy.isScalar()) {
5877 if (DstTy != NarrowTy)
5878 return UnableToLegalize; // FIXME: handle implicit extensions.
5879
5880 if (isPowerOf2_32(NumParts)) {
5881 // Generate a tree of scalar operations to reduce the critical path.
5882 SmallVector<Register> PartialResults;
5883 unsigned NumPartsLeft = NumParts;
5884 while (NumPartsLeft > 1) {
5885 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5886 PartialResults.emplace_back(
5888 .buildInstr(ScalarOpc, {NarrowTy},
5889 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5890 .getReg(0));
5891 }
5892 SplitSrcs = PartialResults;
5893 PartialResults.clear();
5894 NumPartsLeft = SplitSrcs.size();
5895 }
5896 assert(SplitSrcs.size() == 1);
5897 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5898 MI.eraseFromParent();
5899 return Legalized;
5900 }
5901 // If we can't generate a tree, then just do sequential operations.
5902 Register Acc = SplitSrcs[0];
5903 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5904 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5905 .getReg(0);
5906 MIRBuilder.buildCopy(DstReg, Acc);
5907 MI.eraseFromParent();
5908 return Legalized;
5909 }
5910 SmallVector<Register> PartialReductions;
5911 for (unsigned Part = 0; Part < NumParts; ++Part) {
5912 PartialReductions.push_back(
5913 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5914 .getReg(0));
5915 }
5916
5917 // If the types involved are powers of 2, we can generate intermediate vector
5918 // ops, before generating a final reduction operation.
5919 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5920 isPowerOf2_32(NarrowTy.getNumElements())) {
5921 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5922 }
5923
5924 Register Acc = PartialReductions[0];
5925 for (unsigned Part = 1; Part < NumParts; ++Part) {
5926 if (Part == NumParts - 1) {
5927 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5928 {Acc, PartialReductions[Part]});
5929 } else {
5930 Acc = MIRBuilder
5931 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5932 .getReg(0);
5933 }
5934 }
5935 MI.eraseFromParent();
5936 return Legalized;
5937}
5938
5941 unsigned int TypeIdx,
5942 LLT NarrowTy) {
5943 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5944 MI.getFirst3RegLLTs();
5945 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5946 DstTy != NarrowTy)
5947 return UnableToLegalize;
5948
5949 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5950 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5951 "Unexpected vecreduce opcode");
5952 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5953 ? TargetOpcode::G_FADD
5954 : TargetOpcode::G_FMUL;
5955
5956 SmallVector<Register> SplitSrcs;
5957 unsigned NumParts = SrcTy.getNumElements();
5958 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5959 Register Acc = ScalarReg;
5960 for (unsigned i = 0; i < NumParts; i++)
5961 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5962 .getReg(0);
5963
5964 MIRBuilder.buildCopy(DstReg, Acc);
5965 MI.eraseFromParent();
5966 return Legalized;
5967}
5968
5970LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5971 LLT SrcTy, LLT NarrowTy,
5972 unsigned ScalarOpc) {
5973 SmallVector<Register> SplitSrcs;
5974 // Split the sources into NarrowTy size pieces.
5975 extractParts(SrcReg, NarrowTy,
5976 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5977 MIRBuilder, MRI);
5978 // We're going to do a tree reduction using vector operations until we have
5979 // one NarrowTy size value left.
5980 while (SplitSrcs.size() > 1) {
5981 SmallVector<Register> PartialRdxs;
5982 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5983 Register LHS = SplitSrcs[Idx];
5984 Register RHS = SplitSrcs[Idx + 1];
5985 // Create the intermediate vector op.
5986 Register Res =
5987 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5988 PartialRdxs.push_back(Res);
5989 }
5990 SplitSrcs = std::move(PartialRdxs);
5991 }
5992 // Finally generate the requested NarrowTy based reduction.
5993 Observer.changingInstr(MI);
5994 MI.getOperand(1).setReg(SplitSrcs[0]);
5995 Observer.changedInstr(MI);
5996 return Legalized;
5997}
5998
6001 const LLT HalfTy, const LLT AmtTy) {
6002
6003 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6004 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6005 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6006
6007 if (Amt.isZero()) {
6008 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6009 MI.eraseFromParent();
6010 return Legalized;
6011 }
6012
6013 LLT NVT = HalfTy;
6014 unsigned NVTBits = HalfTy.getSizeInBits();
6015 unsigned VTBits = 2 * NVTBits;
6016
6017 SrcOp Lo(Register(0)), Hi(Register(0));
6018 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6019 if (Amt.ugt(VTBits)) {
6020 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6021 } else if (Amt.ugt(NVTBits)) {
6022 Lo = MIRBuilder.buildConstant(NVT, 0);
6023 Hi = MIRBuilder.buildShl(NVT, InL,
6024 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6025 } else if (Amt == NVTBits) {
6026 Lo = MIRBuilder.buildConstant(NVT, 0);
6027 Hi = InL;
6028 } else {
6029 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6030 auto OrLHS =
6031 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6032 auto OrRHS = MIRBuilder.buildLShr(
6033 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6034 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6035 }
6036 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6037 if (Amt.ugt(VTBits)) {
6038 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6039 } else if (Amt.ugt(NVTBits)) {
6040 Lo = MIRBuilder.buildLShr(NVT, InH,
6041 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6042 Hi = MIRBuilder.buildConstant(NVT, 0);
6043 } else if (Amt == NVTBits) {
6044 Lo = InH;
6045 Hi = MIRBuilder.buildConstant(NVT, 0);
6046 } else {
6047 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6048
6049 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6050 auto OrRHS = MIRBuilder.buildShl(
6051 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6052
6053 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6054 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6055 }
6056 } else {
6057 if (Amt.ugt(VTBits)) {
6058 Hi = Lo = MIRBuilder.buildAShr(
6059 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6060 } else if (Amt.ugt(NVTBits)) {
6061 Lo = MIRBuilder.buildAShr(NVT, InH,
6062 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6063 Hi = MIRBuilder.buildAShr(NVT, InH,
6064 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6065 } else if (Amt == NVTBits) {
6066 Lo = InH;
6067 Hi = MIRBuilder.buildAShr(NVT, InH,
6068 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6069 } else {
6070 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6071
6072 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6073 auto OrRHS = MIRBuilder.buildShl(
6074 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6075
6076 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6077 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6078 }
6079 }
6080
6081 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6082 MI.eraseFromParent();
6083
6084 return Legalized;
6085}
6086
6089 LLT RequestedTy) {
6090 if (TypeIdx == 1) {
6091 Observer.changingInstr(MI);
6092 narrowScalarSrc(MI, RequestedTy, 2);
6093 Observer.changedInstr(MI);
6094 return Legalized;
6095 }
6096
6097 Register DstReg = MI.getOperand(0).getReg();
6098 LLT DstTy = MRI.getType(DstReg);
6099 if (DstTy.isVector())
6100 return UnableToLegalize;
6101
6102 Register Amt = MI.getOperand(2).getReg();
6103 LLT ShiftAmtTy = MRI.getType(Amt);
6104 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6105 if (DstEltSize % 2 != 0)
6106 return UnableToLegalize;
6107
6108 // Check if we should use multi-way splitting instead of recursive binary
6109 // splitting.
6110 //
6111 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6112 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6113 // and dependency chains created by usual binary splitting approach
6114 // (128->64->32).
6115 //
6116 // The >= 8 parts threshold ensures we only use this optimization when binary
6117 // splitting would require multiple recursive passes, avoiding overhead for
6118 // simple 2-way splits where binary approach is sufficient.
6119 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6120 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6121 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6122 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6123 // steps).
6124 if (NumParts >= 8)
6125 return narrowScalarShiftMultiway(MI, RequestedTy);
6126 }
6127
6128 // Fall back to binary splitting:
6129 // Ignore the input type. We can only go to exactly half the size of the
6130 // input. If that isn't small enough, the resulting pieces will be further
6131 // legalized.
6132 const unsigned NewBitSize = DstEltSize / 2;
6133 const LLT HalfTy = LLT::scalar(NewBitSize);
6134 const LLT CondTy = LLT::scalar(1);
6135
6136 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6137 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6138 ShiftAmtTy);
6139 }
6140
6141 // TODO: Expand with known bits.
6142
6143 // Handle the fully general expansion by an unknown amount.
6144 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6145
6146 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6147 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6148 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6149
6150 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6151 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6152
6153 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6154 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6155 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6156
6157 Register ResultRegs[2];
6158 switch (MI.getOpcode()) {
6159 case TargetOpcode::G_SHL: {
6160 // Short: ShAmt < NewBitSize
6161 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6162
6163 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6164 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6165 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6166
6167 // Long: ShAmt >= NewBitSize
6168 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6169 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6170
6171 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6172 auto Hi = MIRBuilder.buildSelect(
6173 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6174
6175 ResultRegs[0] = Lo.getReg(0);
6176 ResultRegs[1] = Hi.getReg(0);
6177 break;
6178 }
6179 case TargetOpcode::G_LSHR:
6180 case TargetOpcode::G_ASHR: {
6181 // Short: ShAmt < NewBitSize
6182 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6183
6184 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6185 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6186 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6187
6188 // Long: ShAmt >= NewBitSize
6190 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6191 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6192 } else {
6193 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6194 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6195 }
6196 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6197 {InH, AmtExcess}); // Lo from Hi part.
6198
6199 auto Lo = MIRBuilder.buildSelect(
6200 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6201
6202 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6203
6204 ResultRegs[0] = Lo.getReg(0);
6205 ResultRegs[1] = Hi.getReg(0);
6206 break;
6207 }
6208 default:
6209 llvm_unreachable("not a shift");
6210 }
6211
6212 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6213 MI.eraseFromParent();
6214 return Legalized;
6215}
6216
6218 unsigned PartIdx,
6219 unsigned NumParts,
6220 ArrayRef<Register> SrcParts,
6221 const ShiftParams &Params,
6222 LLT TargetTy, LLT ShiftAmtTy) {
6223 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6224 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6225 assert(WordShiftConst && BitShiftConst && "Expected constants");
6226
6227 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6228 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6229 const bool NeedsInterWordShift = ShiftBits != 0;
6230
6231 switch (Opcode) {
6232 case TargetOpcode::G_SHL: {
6233 // Data moves from lower indices to higher indices
6234 // If this part would come from a source beyond our range, it's zero
6235 if (PartIdx < ShiftWords)
6236 return Params.Zero;
6237
6238 unsigned SrcIdx = PartIdx - ShiftWords;
6239 if (!NeedsInterWordShift)
6240 return SrcParts[SrcIdx];
6241
6242 // Combine shifted main part with carry from previous part
6243 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6244 if (SrcIdx > 0) {
6245 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6246 Params.InvBitShift);
6247 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6248 }
6249 return Hi.getReg(0);
6250 }
6251
6252 case TargetOpcode::G_LSHR: {
6253 unsigned SrcIdx = PartIdx + ShiftWords;
6254 if (SrcIdx >= NumParts)
6255 return Params.Zero;
6256 if (!NeedsInterWordShift)
6257 return SrcParts[SrcIdx];
6258
6259 // Combine shifted main part with carry from next part
6260 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6261 if (SrcIdx + 1 < NumParts) {
6262 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6263 Params.InvBitShift);
6264 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6265 }
6266 return Lo.getReg(0);
6267 }
6268
6269 case TargetOpcode::G_ASHR: {
6270 // Like LSHR but preserves sign bit
6271 unsigned SrcIdx = PartIdx + ShiftWords;
6272 if (SrcIdx >= NumParts)
6273 return Params.SignBit;
6274 if (!NeedsInterWordShift)
6275 return SrcParts[SrcIdx];
6276
6277 // Only the original MSB part uses arithmetic shift to preserve sign. All
6278 // other parts use logical shift since they're just moving data bits.
6279 auto Lo =
6280 (SrcIdx == NumParts - 1)
6281 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6282 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6283 Register HiSrc =
6284 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6285 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6286 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6287 }
6288
6289 default:
6290 llvm_unreachable("not a shift");
6291 }
6292}
6293
6295 Register MainOperand,
6296 Register ShiftAmt,
6297 LLT TargetTy,
6298 Register CarryOperand) {
6299 // This helper generates a single output part for variable shifts by combining
6300 // the main operand (shifted by BitShift) with carry bits from an adjacent
6301 // part.
6302
6303 // For G_ASHR, individual parts don't have their own sign bit, only the
6304 // complete value does. So we use LSHR for the main operand shift in ASHR
6305 // context.
6306 unsigned MainOpcode =
6307 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6308
6309 // Perform the primary shift on the main operand
6310 Register MainShifted =
6311 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6312 .getReg(0);
6313
6314 // No carry operand available
6315 if (!CarryOperand.isValid())
6316 return MainShifted;
6317
6318 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6319 // so carry bits aren't needed.
6320 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6321 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6322 LLT BoolTy = LLT::scalar(1);
6323 auto IsZeroBitShift =
6324 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6325
6326 // Extract bits from the adjacent part that will "carry over" into this part.
6327 // The carry direction is opposite to the main shift direction, so we can
6328 // align the two shifted values before combining them with OR.
6329
6330 // Determine the carry shift opcode (opposite direction)
6331 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6332 : TargetOpcode::G_SHL;
6333
6334 // Calculate inverse shift amount: BitWidth - ShiftAmt
6335 auto TargetBitsConst =
6336 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6337 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6338
6339 // Shift the carry operand
6340 Register CarryBits =
6342 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6343 .getReg(0);
6344
6345 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6346 // TargetBits which would be poison for the individual carry shift operation).
6347 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6348 Register SafeCarryBits =
6349 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6350 .getReg(0);
6351
6352 // Combine the main shifted part with the carry bits
6353 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6354}
6355
6358 const APInt &Amt,
6359 LLT TargetTy,
6360 LLT ShiftAmtTy) {
6361 // Any wide shift can be decomposed into WordShift + BitShift components.
6362 // When shift amount is known constant, directly compute the decomposition
6363 // values and generate constant registers.
6364 Register DstReg = MI.getOperand(0).getReg();
6365 Register SrcReg = MI.getOperand(1).getReg();
6366 LLT DstTy = MRI.getType(DstReg);
6367
6368 const unsigned DstBits = DstTy.getScalarSizeInBits();
6369 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6370 const unsigned NumParts = DstBits / TargetBits;
6371
6372 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6373
6374 // When the shift amount is known at compile time, we just calculate which
6375 // source parts contribute to each output part.
6376
6377 SmallVector<Register, 8> SrcParts;
6378 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6379
6380 if (Amt.isZero()) {
6381 // No shift needed, just copy
6382 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6383 MI.eraseFromParent();
6384 return Legalized;
6385 }
6386
6387 ShiftParams Params;
6388 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6389 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6390
6391 // Generate constants and values needed by all shift types
6392 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6393 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6394 Params.InvBitShift =
6395 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6396 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6397
6398 // For ASHR, we need the sign-extended value to fill shifted-out positions
6399 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6400 Params.SignBit =
6402 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6403 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6404 .getReg(0);
6405
6406 SmallVector<Register, 8> DstParts(NumParts);
6407 for (unsigned I = 0; I < NumParts; ++I)
6408 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6409 Params, TargetTy, ShiftAmtTy);
6410
6411 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6412 MI.eraseFromParent();
6413 return Legalized;
6414}
6415
6418 Register DstReg = MI.getOperand(0).getReg();
6419 Register SrcReg = MI.getOperand(1).getReg();
6420 Register AmtReg = MI.getOperand(2).getReg();
6421 LLT DstTy = MRI.getType(DstReg);
6422 LLT ShiftAmtTy = MRI.getType(AmtReg);
6423
6424 const unsigned DstBits = DstTy.getScalarSizeInBits();
6425 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6426 const unsigned NumParts = DstBits / TargetBits;
6427
6428 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6429 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6430
6431 // If the shift amount is known at compile time, we can use direct indexing
6432 // instead of generating select chains in the general case.
6433 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6434 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6435 ShiftAmtTy);
6436
6437 // For runtime-variable shift amounts, we must generate a more complex
6438 // sequence that handles all possible shift values using select chains.
6439
6440 // Split the input into target-sized pieces
6441 SmallVector<Register, 8> SrcParts;
6442 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6443
6444 // Shifting by zero should be a no-op.
6445 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6446 LLT BoolTy = LLT::scalar(1);
6447 auto IsZeroShift =
6448 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6449
6450 // Any wide shift can be decomposed into two components:
6451 // 1. WordShift: number of complete target-sized words to shift
6452 // 2. BitShift: number of bits to shift within each word
6453 //
6454 // Example: 128-bit >> 50 with 32-bit target:
6455 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6456 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6457 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6458 auto TargetBitsLog2Const =
6459 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6460 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6461
6462 Register WordShift =
6463 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6464 Register BitShift =
6465 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6466
6467 // Fill values:
6468 // - SHL/LSHR: fill with zeros
6469 // - ASHR: fill with sign-extended MSB
6470 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6471
6472 Register FillValue;
6473 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6474 auto TargetBitsMinusOneConst =
6475 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6476 FillValue = MIRBuilder
6477 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6478 TargetBitsMinusOneConst)
6479 .getReg(0);
6480 } else {
6481 FillValue = ZeroReg;
6482 }
6483
6484 SmallVector<Register, 8> DstParts(NumParts);
6485
6486 // For each output part, generate a select chain that chooses the correct
6487 // result based on the runtime WordShift value. This handles all possible
6488 // word shift amounts by pre-calculating what each would produce.
6489 for (unsigned I = 0; I < NumParts; ++I) {
6490 // Initialize with appropriate default value for this shift type
6491 Register InBoundsResult = FillValue;
6492
6493 // clang-format off
6494 // Build a branchless select chain by pre-computing results for all possible
6495 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6496 //
6497 // K=0: select(WordShift==0, result0, FillValue)
6498 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6499 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6500 // clang-format on
6501 for (unsigned K = 0; K < NumParts; ++K) {
6502 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6503 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6504 WordShift, WordShiftKConst);
6505
6506 // Calculate source indices for this word shift
6507 //
6508 // For 4-part 128-bit value with K=1 word shift:
6509 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6510 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6511 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6512 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6513 int MainSrcIdx;
6514 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6515
6516 switch (MI.getOpcode()) {
6517 case TargetOpcode::G_SHL:
6518 MainSrcIdx = (int)I - (int)K;
6519 CarrySrcIdx = MainSrcIdx - 1;
6520 break;
6521 case TargetOpcode::G_LSHR:
6522 case TargetOpcode::G_ASHR:
6523 MainSrcIdx = (int)I + (int)K;
6524 CarrySrcIdx = MainSrcIdx + 1;
6525 break;
6526 default:
6527 llvm_unreachable("Not a shift");
6528 }
6529
6530 // Check bounds and build the result for this word shift
6531 Register ResultForK;
6532 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6533 Register MainOp = SrcParts[MainSrcIdx];
6534 Register CarryOp;
6535
6536 // Determine carry operand with bounds checking
6537 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6538 CarryOp = SrcParts[CarrySrcIdx];
6539 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6540 CarrySrcIdx >= (int)NumParts)
6541 CarryOp = FillValue; // Use sign extension
6542
6543 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6544 TargetTy, CarryOp);
6545 } else {
6546 // Out of bounds - use fill value for this k
6547 ResultForK = FillValue;
6548 }
6549
6550 // Select this result if WordShift equals k
6551 InBoundsResult =
6553 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6554 .getReg(0);
6555 }
6556
6557 // Handle zero-shift special case: if shift is 0, use original input
6558 DstParts[I] =
6560 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6561 .getReg(0);
6562 }
6563
6564 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6565 MI.eraseFromParent();
6566 return Legalized;
6567}
6568
6571 LLT MoreTy) {
6572 assert(TypeIdx == 0 && "Expecting only Idx 0");
6573
6574 Observer.changingInstr(MI);
6575 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6576 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6577 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6578 moreElementsVectorSrc(MI, MoreTy, I);
6579 }
6580
6581 MachineBasicBlock &MBB = *MI.getParent();
6582 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6583 moreElementsVectorDst(MI, MoreTy, 0);
6584 Observer.changedInstr(MI);
6585 return Legalized;
6586}
6587
6588MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6589 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6590 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6591
6592 switch (Opcode) {
6593 default:
6595 "getNeutralElementForVecReduce called with invalid opcode!");
6596 case TargetOpcode::G_VECREDUCE_ADD:
6597 case TargetOpcode::G_VECREDUCE_OR:
6598 case TargetOpcode::G_VECREDUCE_XOR:
6599 case TargetOpcode::G_VECREDUCE_UMAX:
6600 return MIRBuilder.buildConstant(Ty, 0);
6601 case TargetOpcode::G_VECREDUCE_MUL:
6602 return MIRBuilder.buildConstant(Ty, 1);
6603 case TargetOpcode::G_VECREDUCE_AND:
6604 case TargetOpcode::G_VECREDUCE_UMIN:
6606 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6607 case TargetOpcode::G_VECREDUCE_SMAX:
6609 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6610 case TargetOpcode::G_VECREDUCE_SMIN:
6612 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6613 case TargetOpcode::G_VECREDUCE_FADD:
6614 return MIRBuilder.buildFConstant(Ty, -0.0);
6615 case TargetOpcode::G_VECREDUCE_FMUL:
6616 return MIRBuilder.buildFConstant(Ty, 1.0);
6617 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6618 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6619 assert(false && "getNeutralElementForVecReduce unimplemented for "
6620 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6621 }
6622 llvm_unreachable("switch expected to return!");
6623}
6624
6627 LLT MoreTy) {
6628 unsigned Opc = MI.getOpcode();
6629 switch (Opc) {
6630 case TargetOpcode::G_IMPLICIT_DEF:
6631 case TargetOpcode::G_LOAD: {
6632 if (TypeIdx != 0)
6633 return UnableToLegalize;
6634 Observer.changingInstr(MI);
6635 moreElementsVectorDst(MI, MoreTy, 0);
6636 Observer.changedInstr(MI);
6637 return Legalized;
6638 }
6639 case TargetOpcode::G_STORE:
6640 if (TypeIdx != 0)
6641 return UnableToLegalize;
6642 Observer.changingInstr(MI);
6643 moreElementsVectorSrc(MI, MoreTy, 0);
6644 Observer.changedInstr(MI);
6645 return Legalized;
6646 case TargetOpcode::G_AND:
6647 case TargetOpcode::G_OR:
6648 case TargetOpcode::G_XOR:
6649 case TargetOpcode::G_ADD:
6650 case TargetOpcode::G_SUB:
6651 case TargetOpcode::G_MUL:
6652 case TargetOpcode::G_FADD:
6653 case TargetOpcode::G_FSUB:
6654 case TargetOpcode::G_FMUL:
6655 case TargetOpcode::G_FDIV:
6656 case TargetOpcode::G_FCOPYSIGN:
6657 case TargetOpcode::G_UADDSAT:
6658 case TargetOpcode::G_USUBSAT:
6659 case TargetOpcode::G_SADDSAT:
6660 case TargetOpcode::G_SSUBSAT:
6661 case TargetOpcode::G_SMIN:
6662 case TargetOpcode::G_SMAX:
6663 case TargetOpcode::G_UMIN:
6664 case TargetOpcode::G_UMAX:
6665 case TargetOpcode::G_FMINNUM:
6666 case TargetOpcode::G_FMAXNUM:
6667 case TargetOpcode::G_FMINNUM_IEEE:
6668 case TargetOpcode::G_FMAXNUM_IEEE:
6669 case TargetOpcode::G_FMINIMUM:
6670 case TargetOpcode::G_FMAXIMUM:
6671 case TargetOpcode::G_FMINIMUMNUM:
6672 case TargetOpcode::G_FMAXIMUMNUM:
6673 case TargetOpcode::G_STRICT_FADD:
6674 case TargetOpcode::G_STRICT_FSUB:
6675 case TargetOpcode::G_STRICT_FMUL:
6676 case TargetOpcode::G_SHL:
6677 case TargetOpcode::G_ASHR:
6678 case TargetOpcode::G_LSHR: {
6679 Observer.changingInstr(MI);
6680 moreElementsVectorSrc(MI, MoreTy, 1);
6681 moreElementsVectorSrc(MI, MoreTy, 2);
6682 moreElementsVectorDst(MI, MoreTy, 0);
6683 Observer.changedInstr(MI);
6684 return Legalized;
6685 }
6686 case TargetOpcode::G_FMA:
6687 case TargetOpcode::G_STRICT_FMA:
6688 case TargetOpcode::G_FSHR:
6689 case TargetOpcode::G_FSHL: {
6690 Observer.changingInstr(MI);
6691 moreElementsVectorSrc(MI, MoreTy, 1);
6692 moreElementsVectorSrc(MI, MoreTy, 2);
6693 moreElementsVectorSrc(MI, MoreTy, 3);
6694 moreElementsVectorDst(MI, MoreTy, 0);
6695 Observer.changedInstr(MI);
6696 return Legalized;
6697 }
6698 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6699 case TargetOpcode::G_EXTRACT:
6700 if (TypeIdx != 1)
6701 return UnableToLegalize;
6702 Observer.changingInstr(MI);
6703 moreElementsVectorSrc(MI, MoreTy, 1);
6704 Observer.changedInstr(MI);
6705 return Legalized;
6706 case TargetOpcode::G_INSERT:
6707 case TargetOpcode::G_INSERT_VECTOR_ELT:
6708 case TargetOpcode::G_FREEZE:
6709 case TargetOpcode::G_FNEG:
6710 case TargetOpcode::G_FABS:
6711 case TargetOpcode::G_FSQRT:
6712 case TargetOpcode::G_FCEIL:
6713 case TargetOpcode::G_FFLOOR:
6714 case TargetOpcode::G_FNEARBYINT:
6715 case TargetOpcode::G_FRINT:
6716 case TargetOpcode::G_INTRINSIC_ROUND:
6717 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6718 case TargetOpcode::G_INTRINSIC_TRUNC:
6719 case TargetOpcode::G_BITREVERSE:
6720 case TargetOpcode::G_BSWAP:
6721 case TargetOpcode::G_FCANONICALIZE:
6722 case TargetOpcode::G_SEXT_INREG:
6723 case TargetOpcode::G_ABS:
6724 case TargetOpcode::G_CTLZ:
6725 case TargetOpcode::G_CTPOP:
6726 if (TypeIdx != 0)
6727 return UnableToLegalize;
6728 Observer.changingInstr(MI);
6729 moreElementsVectorSrc(MI, MoreTy, 1);
6730 moreElementsVectorDst(MI, MoreTy, 0);
6731 Observer.changedInstr(MI);
6732 return Legalized;
6733 case TargetOpcode::G_SELECT: {
6734 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6735 if (TypeIdx == 1) {
6736 if (!CondTy.isScalar() ||
6737 DstTy.getElementCount() != MoreTy.getElementCount())
6738 return UnableToLegalize;
6739
6740 // This is turning a scalar select of vectors into a vector
6741 // select. Broadcast the select condition.
6742 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6743 Observer.changingInstr(MI);
6744 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6745 Observer.changedInstr(MI);
6746 return Legalized;
6747 }
6748
6749 if (CondTy.isVector())
6750 return UnableToLegalize;
6751
6752 Observer.changingInstr(MI);
6753 moreElementsVectorSrc(MI, MoreTy, 2);
6754 moreElementsVectorSrc(MI, MoreTy, 3);
6755 moreElementsVectorDst(MI, MoreTy, 0);
6756 Observer.changedInstr(MI);
6757 return Legalized;
6758 }
6759 case TargetOpcode::G_UNMERGE_VALUES:
6760 return UnableToLegalize;
6761 case TargetOpcode::G_PHI:
6762 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6763 case TargetOpcode::G_SHUFFLE_VECTOR:
6764 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6765 case TargetOpcode::G_BUILD_VECTOR: {
6767 for (auto Op : MI.uses()) {
6768 Elts.push_back(Op.getReg());
6769 }
6770
6771 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6772 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6773 }
6774
6775 MIRBuilder.buildDeleteTrailingVectorElements(
6776 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6777 MI.eraseFromParent();
6778 return Legalized;
6779 }
6780 case TargetOpcode::G_SEXT:
6781 case TargetOpcode::G_ZEXT:
6782 case TargetOpcode::G_ANYEXT:
6783 case TargetOpcode::G_TRUNC:
6784 case TargetOpcode::G_FPTRUNC:
6785 case TargetOpcode::G_FPEXT:
6786 case TargetOpcode::G_FPTOSI:
6787 case TargetOpcode::G_FPTOUI:
6788 case TargetOpcode::G_FPTOSI_SAT:
6789 case TargetOpcode::G_FPTOUI_SAT:
6790 case TargetOpcode::G_SITOFP:
6791 case TargetOpcode::G_UITOFP: {
6792 Observer.changingInstr(MI);
6793 LLT SrcExtTy;
6794 LLT DstExtTy;
6795 if (TypeIdx == 0) {
6796 DstExtTy = MoreTy;
6797 SrcExtTy = LLT::fixed_vector(
6798 MoreTy.getNumElements(),
6799 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6800 } else {
6801 DstExtTy = LLT::fixed_vector(
6802 MoreTy.getNumElements(),
6803 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6804 SrcExtTy = MoreTy;
6805 }
6806 moreElementsVectorSrc(MI, SrcExtTy, 1);
6807 moreElementsVectorDst(MI, DstExtTy, 0);
6808 Observer.changedInstr(MI);
6809 return Legalized;
6810 }
6811 case TargetOpcode::G_ICMP:
6812 case TargetOpcode::G_FCMP: {
6813 if (TypeIdx != 1)
6814 return UnableToLegalize;
6815
6816 Observer.changingInstr(MI);
6817 moreElementsVectorSrc(MI, MoreTy, 2);
6818 moreElementsVectorSrc(MI, MoreTy, 3);
6819 LLT CondTy = LLT::fixed_vector(
6820 MoreTy.getNumElements(),
6821 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6822 moreElementsVectorDst(MI, CondTy, 0);
6823 Observer.changedInstr(MI);
6824 return Legalized;
6825 }
6826 case TargetOpcode::G_BITCAST: {
6827 if (TypeIdx != 0)
6828 return UnableToLegalize;
6829
6830 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6831 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6832
6833 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6834 if (coefficient % DstTy.getNumElements() != 0)
6835 return UnableToLegalize;
6836
6837 coefficient = coefficient / DstTy.getNumElements();
6838
6839 LLT NewTy = SrcTy.changeElementCount(
6840 ElementCount::get(coefficient, MoreTy.isScalable()));
6841 Observer.changingInstr(MI);
6842 moreElementsVectorSrc(MI, NewTy, 1);
6843 moreElementsVectorDst(MI, MoreTy, 0);
6844 Observer.changedInstr(MI);
6845 return Legalized;
6846 }
6847 case TargetOpcode::G_VECREDUCE_FADD:
6848 case TargetOpcode::G_VECREDUCE_FMUL:
6849 case TargetOpcode::G_VECREDUCE_ADD:
6850 case TargetOpcode::G_VECREDUCE_MUL:
6851 case TargetOpcode::G_VECREDUCE_AND:
6852 case TargetOpcode::G_VECREDUCE_OR:
6853 case TargetOpcode::G_VECREDUCE_XOR:
6854 case TargetOpcode::G_VECREDUCE_SMAX:
6855 case TargetOpcode::G_VECREDUCE_SMIN:
6856 case TargetOpcode::G_VECREDUCE_UMAX:
6857 case TargetOpcode::G_VECREDUCE_UMIN: {
6858 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6859 MachineOperand &MO = MI.getOperand(1);
6860 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6861 auto NeutralElement = getNeutralElementForVecReduce(
6862 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6863
6864 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6865 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6866 i != e; i++) {
6867 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6868 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6869 NeutralElement, Idx);
6870 }
6871
6872 Observer.changingInstr(MI);
6873 MO.setReg(NewVec.getReg(0));
6874 Observer.changedInstr(MI);
6875 return Legalized;
6876 }
6877
6878 default:
6879 return UnableToLegalize;
6880 }
6881}
6882
6885 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6886 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6887 unsigned MaskNumElts = Mask.size();
6888 unsigned SrcNumElts = SrcTy.getNumElements();
6889 LLT DestEltTy = DstTy.getElementType();
6890
6891 if (MaskNumElts == SrcNumElts)
6892 return Legalized;
6893
6894 if (MaskNumElts < SrcNumElts) {
6895 // Extend mask to match new destination vector size with
6896 // undef values.
6897 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6898 llvm::copy(Mask, NewMask.begin());
6899
6900 moreElementsVectorDst(MI, SrcTy, 0);
6901 MIRBuilder.setInstrAndDebugLoc(MI);
6902 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6903 MI.getOperand(1).getReg(),
6904 MI.getOperand(2).getReg(), NewMask);
6905 MI.eraseFromParent();
6906
6907 return Legalized;
6908 }
6909
6910 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6911 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6912 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6913
6914 // Create new source vectors by concatenating the initial
6915 // source vectors with undefined vectors of the same size.
6916 auto Undef = MIRBuilder.buildUndef(SrcTy);
6917 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6918 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6919 MOps1[0] = MI.getOperand(1).getReg();
6920 MOps2[0] = MI.getOperand(2).getReg();
6921
6922 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6923 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6924
6925 // Readjust mask for new input vector length.
6926 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6927 for (unsigned I = 0; I != MaskNumElts; ++I) {
6928 int Idx = Mask[I];
6929 if (Idx >= static_cast<int>(SrcNumElts))
6930 Idx += PaddedMaskNumElts - SrcNumElts;
6931 MappedOps[I] = Idx;
6932 }
6933
6934 // If we got more elements than required, extract subvector.
6935 if (MaskNumElts != PaddedMaskNumElts) {
6936 auto Shuffle =
6937 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6938
6939 SmallVector<Register, 16> Elts(MaskNumElts);
6940 for (unsigned I = 0; I < MaskNumElts; ++I) {
6941 Elts[I] =
6942 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6943 .getReg(0);
6944 }
6945 MIRBuilder.buildBuildVector(DstReg, Elts);
6946 } else {
6947 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6948 }
6949
6950 MI.eraseFromParent();
6952}
6953
6956 unsigned int TypeIdx, LLT MoreTy) {
6957 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6958 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6959 unsigned NumElts = DstTy.getNumElements();
6960 unsigned WidenNumElts = MoreTy.getNumElements();
6961
6962 if (DstTy.isVector() && Src1Ty.isVector() &&
6963 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6965 }
6966
6967 if (TypeIdx != 0)
6968 return UnableToLegalize;
6969
6970 // Expect a canonicalized shuffle.
6971 if (DstTy != Src1Ty || DstTy != Src2Ty)
6972 return UnableToLegalize;
6973
6974 moreElementsVectorSrc(MI, MoreTy, 1);
6975 moreElementsVectorSrc(MI, MoreTy, 2);
6976
6977 // Adjust mask based on new input vector length.
6978 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6979 for (unsigned I = 0; I != NumElts; ++I) {
6980 int Idx = Mask[I];
6981 if (Idx < static_cast<int>(NumElts))
6982 NewMask[I] = Idx;
6983 else
6984 NewMask[I] = Idx - NumElts + WidenNumElts;
6985 }
6986 moreElementsVectorDst(MI, MoreTy, 0);
6987 MIRBuilder.setInstrAndDebugLoc(MI);
6988 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6989 MI.getOperand(1).getReg(),
6990 MI.getOperand(2).getReg(), NewMask);
6991 MI.eraseFromParent();
6992 return Legalized;
6993}
6994
6995void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6996 ArrayRef<Register> Src1Regs,
6997 ArrayRef<Register> Src2Regs,
6998 LLT NarrowTy) {
7000 unsigned SrcParts = Src1Regs.size();
7001 unsigned DstParts = DstRegs.size();
7002
7003 unsigned DstIdx = 0; // Low bits of the result.
7004 Register FactorSum =
7005 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7006 DstRegs[DstIdx] = FactorSum;
7007
7008 Register CarrySumPrevDstIdx;
7010
7011 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7012 // Collect low parts of muls for DstIdx.
7013 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7014 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7016 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7017 Factors.push_back(Mul.getReg(0));
7018 }
7019 // Collect high parts of muls from previous DstIdx.
7020 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7021 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7022 MachineInstrBuilder Umulh =
7023 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7024 Factors.push_back(Umulh.getReg(0));
7025 }
7026 // Add CarrySum from additions calculated for previous DstIdx.
7027 if (DstIdx != 1) {
7028 Factors.push_back(CarrySumPrevDstIdx);
7029 }
7030
7031 Register CarrySum;
7032 // Add all factors and accumulate all carries into CarrySum.
7033 if (DstIdx != DstParts - 1) {
7034 MachineInstrBuilder Uaddo =
7035 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7036 FactorSum = Uaddo.getReg(0);
7037 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7038 for (unsigned i = 2; i < Factors.size(); ++i) {
7039 MachineInstrBuilder Uaddo =
7040 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7041 FactorSum = Uaddo.getReg(0);
7042 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7043 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7044 }
7045 } else {
7046 // Since value for the next index is not calculated, neither is CarrySum.
7047 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7048 for (unsigned i = 2; i < Factors.size(); ++i)
7049 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7050 }
7051
7052 CarrySumPrevDstIdx = CarrySum;
7053 DstRegs[DstIdx] = FactorSum;
7054 Factors.clear();
7055 }
7056}
7057
7060 LLT NarrowTy) {
7061 if (TypeIdx != 0)
7062 return UnableToLegalize;
7063
7064 Register DstReg = MI.getOperand(0).getReg();
7065 LLT DstType = MRI.getType(DstReg);
7066 // FIXME: add support for vector types
7067 if (DstType.isVector())
7068 return UnableToLegalize;
7069
7070 unsigned Opcode = MI.getOpcode();
7071 unsigned OpO, OpE, OpF;
7072 switch (Opcode) {
7073 case TargetOpcode::G_SADDO:
7074 case TargetOpcode::G_SADDE:
7075 case TargetOpcode::G_UADDO:
7076 case TargetOpcode::G_UADDE:
7077 case TargetOpcode::G_ADD:
7078 OpO = TargetOpcode::G_UADDO;
7079 OpE = TargetOpcode::G_UADDE;
7080 OpF = TargetOpcode::G_UADDE;
7081 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7082 OpF = TargetOpcode::G_SADDE;
7083 break;
7084 case TargetOpcode::G_SSUBO:
7085 case TargetOpcode::G_SSUBE:
7086 case TargetOpcode::G_USUBO:
7087 case TargetOpcode::G_USUBE:
7088 case TargetOpcode::G_SUB:
7089 OpO = TargetOpcode::G_USUBO;
7090 OpE = TargetOpcode::G_USUBE;
7091 OpF = TargetOpcode::G_USUBE;
7092 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7093 OpF = TargetOpcode::G_SSUBE;
7094 break;
7095 default:
7096 llvm_unreachable("Unexpected add/sub opcode!");
7097 }
7098
7099 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7100 unsigned NumDefs = MI.getNumExplicitDefs();
7101 Register Src1 = MI.getOperand(NumDefs).getReg();
7102 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7103 Register CarryDst, CarryIn;
7104 if (NumDefs == 2)
7105 CarryDst = MI.getOperand(1).getReg();
7106 if (MI.getNumOperands() == NumDefs + 3)
7107 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7108
7109 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7110 LLT LeftoverTy, DummyTy;
7111 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7112 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7113 MIRBuilder, MRI);
7114 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7115 MRI);
7116
7117 int NarrowParts = Src1Regs.size();
7118 Src1Regs.append(Src1Left);
7119 Src2Regs.append(Src2Left);
7120 DstRegs.reserve(Src1Regs.size());
7121
7122 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7123 Register DstReg =
7124 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7125 Register CarryOut;
7126 // Forward the final carry-out to the destination register
7127 if (i == e - 1 && CarryDst)
7128 CarryOut = CarryDst;
7129 else
7130 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7131
7132 if (!CarryIn) {
7133 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7134 {Src1Regs[i], Src2Regs[i]});
7135 } else if (i == e - 1) {
7136 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7137 {Src1Regs[i], Src2Regs[i], CarryIn});
7138 } else {
7139 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7140 {Src1Regs[i], Src2Regs[i], CarryIn});
7141 }
7142
7143 DstRegs.push_back(DstReg);
7144 CarryIn = CarryOut;
7145 }
7146 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7147 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7148 ArrayRef(DstRegs).drop_front(NarrowParts));
7149
7150 MI.eraseFromParent();
7151 return Legalized;
7152}
7153
7156 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7157
7158 LLT Ty = MRI.getType(DstReg);
7159 if (Ty.isVector())
7160 return UnableToLegalize;
7161
7162 unsigned Size = Ty.getSizeInBits();
7163 unsigned NarrowSize = NarrowTy.getSizeInBits();
7164 if (Size % NarrowSize != 0)
7165 return UnableToLegalize;
7166
7167 unsigned NumParts = Size / NarrowSize;
7168 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7169 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7170
7171 SmallVector<Register, 2> Src1Parts, Src2Parts;
7172 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7173 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7174 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7175 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7176
7177 // Take only high half of registers if this is high mul.
7178 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7179 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7180 MI.eraseFromParent();
7181 return Legalized;
7182}
7183
7186 LLT NarrowTy) {
7187 if (TypeIdx != 0)
7188 return UnableToLegalize;
7189
7190 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7191
7192 Register Src = MI.getOperand(1).getReg();
7193 LLT SrcTy = MRI.getType(Src);
7194
7195 // If all finite floats fit into the narrowed integer type, we can just swap
7196 // out the result type. This is practically only useful for conversions from
7197 // half to at least 16-bits, so just handle the one case.
7198 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7199 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7200 return UnableToLegalize;
7201
7202 Observer.changingInstr(MI);
7203 narrowScalarDst(MI, NarrowTy, 0,
7204 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7205 Observer.changedInstr(MI);
7206 return Legalized;
7207}
7208
7211 LLT NarrowTy) {
7212 if (TypeIdx != 1)
7213 return UnableToLegalize;
7214
7215 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7216
7217 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7218 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7219 // NarrowSize.
7220 if (SizeOp1 % NarrowSize != 0)
7221 return UnableToLegalize;
7222 int NumParts = SizeOp1 / NarrowSize;
7223
7224 SmallVector<Register, 2> SrcRegs, DstRegs;
7225 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7226 MIRBuilder, MRI);
7227
7228 Register OpReg = MI.getOperand(0).getReg();
7229 uint64_t OpStart = MI.getOperand(2).getImm();
7230 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7231 for (int i = 0; i < NumParts; ++i) {
7232 unsigned SrcStart = i * NarrowSize;
7233
7234 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7235 // No part of the extract uses this subregister, ignore it.
7236 continue;
7237 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7238 // The entire subregister is extracted, forward the value.
7239 DstRegs.push_back(SrcRegs[i]);
7240 continue;
7241 }
7242
7243 // OpSegStart is where this destination segment would start in OpReg if it
7244 // extended infinitely in both directions.
7245 int64_t ExtractOffset;
7246 uint64_t SegSize;
7247 if (OpStart < SrcStart) {
7248 ExtractOffset = 0;
7249 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7250 } else {
7251 ExtractOffset = OpStart - SrcStart;
7252 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7253 }
7254
7255 Register SegReg = SrcRegs[i];
7256 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7257 // A genuine extract is needed.
7258 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7259 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7260 }
7261
7262 DstRegs.push_back(SegReg);
7263 }
7264
7265 Register DstReg = MI.getOperand(0).getReg();
7266 if (MRI.getType(DstReg).isVector())
7267 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7268 else if (DstRegs.size() > 1)
7269 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7270 else
7271 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7272 MI.eraseFromParent();
7273 return Legalized;
7274}
7275
7278 LLT NarrowTy) {
7279 // FIXME: Don't know how to handle secondary types yet.
7280 if (TypeIdx != 0)
7281 return UnableToLegalize;
7282
7283 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7284 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7285 LLT LeftoverTy;
7286 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7287 LeftoverRegs, MIRBuilder, MRI);
7288
7289 SrcRegs.append(LeftoverRegs);
7290
7291 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7292 Register OpReg = MI.getOperand(2).getReg();
7293 uint64_t OpStart = MI.getOperand(3).getImm();
7294 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7295 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7296 unsigned DstStart = I * NarrowSize;
7297
7298 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7299 // The entire subregister is defined by this insert, forward the new
7300 // value.
7301 DstRegs.push_back(OpReg);
7302 continue;
7303 }
7304
7305 Register SrcReg = SrcRegs[I];
7306 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7307 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7308 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7309 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7310 }
7311
7312 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7313 // No part of the insert affects this subregister, forward the original.
7314 DstRegs.push_back(SrcReg);
7315 continue;
7316 }
7317
7318 // OpSegStart is where this destination segment would start in OpReg if it
7319 // extended infinitely in both directions.
7320 int64_t ExtractOffset, InsertOffset;
7321 uint64_t SegSize;
7322 if (OpStart < DstStart) {
7323 InsertOffset = 0;
7324 ExtractOffset = DstStart - OpStart;
7325 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7326 } else {
7327 InsertOffset = OpStart - DstStart;
7328 ExtractOffset = 0;
7329 SegSize =
7330 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7331 }
7332
7333 Register SegReg = OpReg;
7334 if (ExtractOffset != 0 || SegSize != OpSize) {
7335 // A genuine extract is needed.
7336 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7337 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7338 }
7339
7340 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7341 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7342 DstRegs.push_back(DstReg);
7343 }
7344
7345 uint64_t WideSize = DstRegs.size() * NarrowSize;
7346 Register DstReg = MI.getOperand(0).getReg();
7347 if (WideSize > RegTy.getSizeInBits()) {
7348 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7349 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7350 MIRBuilder.buildTrunc(DstReg, MergeReg);
7351 } else
7352 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7353
7354 MI.eraseFromParent();
7355 return Legalized;
7356}
7357
7360 LLT NarrowTy) {
7361 Register DstReg = MI.getOperand(0).getReg();
7362 LLT DstTy = MRI.getType(DstReg);
7363
7364 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7365
7366 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7367 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7368 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7369 LLT LeftoverTy;
7370 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7371 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7372 return UnableToLegalize;
7373
7374 LLT Unused;
7375 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7376 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7377 llvm_unreachable("inconsistent extractParts result");
7378
7379 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7380 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7381 {Src0Regs[I], Src1Regs[I]});
7382 DstRegs.push_back(Inst.getReg(0));
7383 }
7384
7385 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7386 auto Inst = MIRBuilder.buildInstr(
7387 MI.getOpcode(),
7388 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7389 DstLeftoverRegs.push_back(Inst.getReg(0));
7390 }
7391
7392 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7393 LeftoverTy, DstLeftoverRegs);
7394
7395 MI.eraseFromParent();
7396 return Legalized;
7397}
7398
7401 LLT NarrowTy) {
7402 if (TypeIdx != 0)
7403 return UnableToLegalize;
7404
7405 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7406
7407 LLT DstTy = MRI.getType(DstReg);
7408 if (DstTy.isVector())
7409 return UnableToLegalize;
7410
7412 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7413 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7414 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7415
7416 MI.eraseFromParent();
7417 return Legalized;
7418}
7419
7422 LLT NarrowTy) {
7423 if (TypeIdx != 0)
7424 return UnableToLegalize;
7425
7426 Register CondReg = MI.getOperand(1).getReg();
7427 LLT CondTy = MRI.getType(CondReg);
7428 if (CondTy.isVector()) // TODO: Handle vselect
7429 return UnableToLegalize;
7430
7431 Register DstReg = MI.getOperand(0).getReg();
7432 LLT DstTy = MRI.getType(DstReg);
7433
7434 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7435 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7436 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7437 LLT LeftoverTy;
7438 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7439 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7440 return UnableToLegalize;
7441
7442 LLT Unused;
7443 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7444 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7445 llvm_unreachable("inconsistent extractParts result");
7446
7447 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7448 auto Select = MIRBuilder.buildSelect(NarrowTy,
7449 CondReg, Src1Regs[I], Src2Regs[I]);
7450 DstRegs.push_back(Select.getReg(0));
7451 }
7452
7453 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7454 auto Select = MIRBuilder.buildSelect(
7455 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7456 DstLeftoverRegs.push_back(Select.getReg(0));
7457 }
7458
7459 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7460 LeftoverTy, DstLeftoverRegs);
7461
7462 MI.eraseFromParent();
7463 return Legalized;
7464}
7465
7468 LLT NarrowTy) {
7469 if (TypeIdx != 1)
7470 return UnableToLegalize;
7471
7472 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7473 unsigned NarrowSize = NarrowTy.getSizeInBits();
7474
7475 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7476 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7477
7479 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7480 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7481 auto C_0 = B.buildConstant(NarrowTy, 0);
7482 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7483 UnmergeSrc.getReg(1), C_0);
7484 auto LoCTLZ = IsUndef ?
7485 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7486 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7487 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7488 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7489 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7490 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7491
7492 MI.eraseFromParent();
7493 return Legalized;
7494 }
7495
7496 return UnableToLegalize;
7497}
7498
7501 LLT NarrowTy) {
7502 if (TypeIdx != 1)
7503 return UnableToLegalize;
7504
7505 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7506 unsigned NarrowSize = NarrowTy.getSizeInBits();
7507
7508 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7509 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7510
7512 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7513 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7514 auto C_0 = B.buildConstant(NarrowTy, 0);
7515 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7516 UnmergeSrc.getReg(0), C_0);
7517 auto HiCTTZ = IsUndef ?
7518 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7519 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7520 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7521 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7522 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7523 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7524
7525 MI.eraseFromParent();
7526 return Legalized;
7527 }
7528
7529 return UnableToLegalize;
7530}
7531
7534 LLT NarrowTy) {
7535 if (TypeIdx != 1)
7536 return UnableToLegalize;
7537
7538 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7539 unsigned NarrowSize = NarrowTy.getSizeInBits();
7540
7541 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7542 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7543
7544 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7545 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7546 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7547
7548 MI.eraseFromParent();
7549 return Legalized;
7550 }
7551
7552 return UnableToLegalize;
7553}
7554
7557 LLT NarrowTy) {
7558 if (TypeIdx != 1)
7559 return UnableToLegalize;
7560
7562 Register ExpReg = MI.getOperand(2).getReg();
7563 LLT ExpTy = MRI.getType(ExpReg);
7564
7565 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7566
7567 // Clamp the exponent to the range of the target type.
7568 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7569 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7570 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7571 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7572
7573 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7574 Observer.changingInstr(MI);
7575 MI.getOperand(2).setReg(Trunc.getReg(0));
7576 Observer.changedInstr(MI);
7577 return Legalized;
7578}
7579
7582 unsigned Opc = MI.getOpcode();
7583 const auto &TII = MIRBuilder.getTII();
7584 auto isSupported = [this](const LegalityQuery &Q) {
7585 auto QAction = LI.getAction(Q).Action;
7586 return QAction == Legal || QAction == Libcall || QAction == Custom;
7587 };
7588 switch (Opc) {
7589 default:
7590 return UnableToLegalize;
7591 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7592 // This trivially expands to CTLZ.
7593 Observer.changingInstr(MI);
7594 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7595 Observer.changedInstr(MI);
7596 return Legalized;
7597 }
7598 case TargetOpcode::G_CTLZ: {
7599 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7600 unsigned Len = SrcTy.getSizeInBits();
7601
7602 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7603 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7604 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7605 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7606 auto ICmp = MIRBuilder.buildICmp(
7607 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7608 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7609 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7610 MI.eraseFromParent();
7611 return Legalized;
7612 }
7613 // for now, we do this:
7614 // NewLen = NextPowerOf2(Len);
7615 // x = x | (x >> 1);
7616 // x = x | (x >> 2);
7617 // ...
7618 // x = x | (x >>16);
7619 // x = x | (x >>32); // for 64-bit input
7620 // Upto NewLen/2
7621 // return Len - popcount(x);
7622 //
7623 // Ref: "Hacker's Delight" by Henry Warren
7624 Register Op = SrcReg;
7625 unsigned NewLen = PowerOf2Ceil(Len);
7626 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7627 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7628 auto MIBOp = MIRBuilder.buildOr(
7629 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7630 Op = MIBOp.getReg(0);
7631 }
7632 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7633 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7634 MIBPop);
7635 MI.eraseFromParent();
7636 return Legalized;
7637 }
7638 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7639 // This trivially expands to CTTZ.
7640 Observer.changingInstr(MI);
7641 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7642 Observer.changedInstr(MI);
7643 return Legalized;
7644 }
7645 case TargetOpcode::G_CTTZ: {
7646 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7647
7648 unsigned Len = SrcTy.getSizeInBits();
7649 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7650 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7651 // zero.
7652 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7653 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7654 auto ICmp = MIRBuilder.buildICmp(
7655 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7656 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7657 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7658 MI.eraseFromParent();
7659 return Legalized;
7660 }
7661 // for now, we use: { return popcount(~x & (x - 1)); }
7662 // unless the target has ctlz but not ctpop, in which case we use:
7663 // { return 32 - nlz(~x & (x-1)); }
7664 // Ref: "Hacker's Delight" by Henry Warren
7665 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7666 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7667 auto MIBTmp = MIRBuilder.buildAnd(
7668 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7669 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7670 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7671 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7672 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7673 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7674 MI.eraseFromParent();
7675 return Legalized;
7676 }
7677 Observer.changingInstr(MI);
7678 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7679 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7680 Observer.changedInstr(MI);
7681 return Legalized;
7682 }
7683 case TargetOpcode::G_CTPOP: {
7684 Register SrcReg = MI.getOperand(1).getReg();
7685 LLT Ty = MRI.getType(SrcReg);
7686 unsigned Size = Ty.getSizeInBits();
7688
7689 // Count set bits in blocks of 2 bits. Default approach would be
7690 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7691 // We use following formula instead:
7692 // B2Count = val - { (val >> 1) & 0x55555555 }
7693 // since it gives same result in blocks of 2 with one instruction less.
7694 auto C_1 = B.buildConstant(Ty, 1);
7695 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7696 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7697 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7698 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7699 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7700
7701 // In order to get count in blocks of 4 add values from adjacent block of 2.
7702 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7703 auto C_2 = B.buildConstant(Ty, 2);
7704 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7705 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7706 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7707 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7708 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7709 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7710
7711 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7712 // addition since count value sits in range {0,...,8} and 4 bits are enough
7713 // to hold such binary values. After addition high 4 bits still hold count
7714 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7715 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7716 auto C_4 = B.buildConstant(Ty, 4);
7717 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7718 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7719 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7720 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7721 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7722
7723 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7724 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7725 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7726 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7727
7728 // Shift count result from 8 high bits to low bits.
7729 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7730
7731 auto IsMulSupported = [this](const LLT Ty) {
7732 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7733 return Action == Legal || Action == WidenScalar || Action == Custom;
7734 };
7735 if (IsMulSupported(Ty)) {
7736 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7737 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7738 } else {
7739 auto ResTmp = B8Count;
7740 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7741 auto ShiftC = B.buildConstant(Ty, Shift);
7742 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7743 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7744 }
7745 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7746 }
7747 MI.eraseFromParent();
7748 return Legalized;
7749 }
7750 }
7751}
7752
7753// Check that (every element of) Reg is undef or not an exact multiple of BW.
7755 Register Reg, unsigned BW) {
7756 return matchUnaryPredicate(
7757 MRI, Reg,
7758 [=](const Constant *C) {
7759 // Null constant here means an undef.
7761 return !CI || CI->getValue().urem(BW) != 0;
7762 },
7763 /*AllowUndefs*/ true);
7764}
7765
7768 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7769 LLT Ty = MRI.getType(Dst);
7770 LLT ShTy = MRI.getType(Z);
7771
7772 unsigned BW = Ty.getScalarSizeInBits();
7773
7774 if (!isPowerOf2_32(BW))
7775 return UnableToLegalize;
7776
7777 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7778 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7779
7780 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7781 // fshl X, Y, Z -> fshr X, Y, -Z
7782 // fshr X, Y, Z -> fshl X, Y, -Z
7783 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7784 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7785 } else {
7786 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7787 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7788 auto One = MIRBuilder.buildConstant(ShTy, 1);
7789 if (IsFSHL) {
7790 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7791 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7792 } else {
7793 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7794 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7795 }
7796
7797 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7798 }
7799
7800 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7801 MI.eraseFromParent();
7802 return Legalized;
7803}
7804
7807 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7808 LLT Ty = MRI.getType(Dst);
7809 LLT ShTy = MRI.getType(Z);
7810
7811 const unsigned BW = Ty.getScalarSizeInBits();
7812 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7813
7814 Register ShX, ShY;
7815 Register ShAmt, InvShAmt;
7816
7817 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7818 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7819 // fshl: X << C | Y >> (BW - C)
7820 // fshr: X << (BW - C) | Y >> C
7821 // where C = Z % BW is not zero
7822 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7823 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7824 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7825 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7826 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7827 } else {
7828 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7829 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7830 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7831 if (isPowerOf2_32(BW)) {
7832 // Z % BW -> Z & (BW - 1)
7833 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7834 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7835 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7836 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7837 } else {
7838 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7839 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7840 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7841 }
7842
7843 auto One = MIRBuilder.buildConstant(ShTy, 1);
7844 if (IsFSHL) {
7845 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7846 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7847 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7848 } else {
7849 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7850 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7851 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7852 }
7853 }
7854
7855 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7856 MI.eraseFromParent();
7857 return Legalized;
7858}
7859
7862 // These operations approximately do the following (while avoiding undefined
7863 // shifts by BW):
7864 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7865 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7866 Register Dst = MI.getOperand(0).getReg();
7867 LLT Ty = MRI.getType(Dst);
7868 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7869
7870 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7871 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7872
7873 // TODO: Use smarter heuristic that accounts for vector legalization.
7874 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7875 return lowerFunnelShiftAsShifts(MI);
7876
7877 // This only works for powers of 2, fallback to shifts if it fails.
7878 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7879 if (Result == UnableToLegalize)
7880 return lowerFunnelShiftAsShifts(MI);
7881 return Result;
7882}
7883
7885 auto [Dst, Src] = MI.getFirst2Regs();
7886 LLT DstTy = MRI.getType(Dst);
7887 LLT SrcTy = MRI.getType(Src);
7888
7889 uint32_t DstTySize = DstTy.getSizeInBits();
7890 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7891 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7892
7893 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7894 !isPowerOf2_32(SrcTyScalarSize))
7895 return UnableToLegalize;
7896
7897 // The step between extend is too large, split it by creating an intermediate
7898 // extend instruction
7899 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7900 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7901 // If the destination type is illegal, split it into multiple statements
7902 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7903 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7904 // Unmerge the vector
7905 LLT EltTy = MidTy.changeElementCount(
7907 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7908
7909 // ZExt the vectors
7910 LLT ZExtResTy = DstTy.changeElementCount(
7912 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7913 {UnmergeSrc.getReg(0)});
7914 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7915 {UnmergeSrc.getReg(1)});
7916
7917 // Merge the ending vectors
7918 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7919
7920 MI.eraseFromParent();
7921 return Legalized;
7922 }
7923 return UnableToLegalize;
7924}
7925
7927 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7928 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7929 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7930 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7931 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7932 // %lo16(<4 x s16>) = G_TRUNC %inlo
7933 // %hi16(<4 x s16>) = G_TRUNC %inhi
7934 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7935 // %res(<8 x s8>) = G_TRUNC %in16
7936
7937 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7938
7939 Register DstReg = MI.getOperand(0).getReg();
7940 Register SrcReg = MI.getOperand(1).getReg();
7941 LLT DstTy = MRI.getType(DstReg);
7942 LLT SrcTy = MRI.getType(SrcReg);
7943
7944 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7946 isPowerOf2_32(SrcTy.getNumElements()) &&
7947 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7948 // Split input type.
7949 LLT SplitSrcTy = SrcTy.changeElementCount(
7950 SrcTy.getElementCount().divideCoefficientBy(2));
7951
7952 // First, split the source into two smaller vectors.
7953 SmallVector<Register, 2> SplitSrcs;
7954 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7955
7956 // Truncate the splits into intermediate narrower elements.
7957 LLT InterTy;
7958 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7959 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7960 else
7961 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7962 for (Register &Src : SplitSrcs)
7963 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7964
7965 // Combine the new truncates into one vector
7966 auto Merge = MIRBuilder.buildMergeLikeInstr(
7967 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7968
7969 // Truncate the new vector to the final result type
7970 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7971 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7972 else
7973 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7974
7975 MI.eraseFromParent();
7976
7977 return Legalized;
7978 }
7979 return UnableToLegalize;
7980}
7981
7984 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7985 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7986 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7987 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7988 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7989 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7990 MI.eraseFromParent();
7991 return Legalized;
7992}
7993
7995 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7996
7997 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7998 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7999
8000 MIRBuilder.setInstrAndDebugLoc(MI);
8001
8002 // If a rotate in the other direction is supported, use it.
8003 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8004 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8005 isPowerOf2_32(EltSizeInBits))
8006 return lowerRotateWithReverseRotate(MI);
8007
8008 // If a funnel shift is supported, use it.
8009 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8010 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8011 bool IsFShLegal = false;
8012 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8013 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8014 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8015 Register R3) {
8016 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8017 MI.eraseFromParent();
8018 return Legalized;
8019 };
8020 // If a funnel shift in the other direction is supported, use it.
8021 if (IsFShLegal) {
8022 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8023 } else if (isPowerOf2_32(EltSizeInBits)) {
8024 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8025 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8026 }
8027 }
8028
8029 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8030 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8031 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8032 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8033 Register ShVal;
8034 Register RevShiftVal;
8035 if (isPowerOf2_32(EltSizeInBits)) {
8036 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8037 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8038 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8039 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8040 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8041 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8042 RevShiftVal =
8043 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8044 } else {
8045 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8046 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8047 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8048 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8049 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8050 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8051 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8052 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8053 RevShiftVal =
8054 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8055 }
8056 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8057 MI.eraseFromParent();
8058 return Legalized;
8059}
8060
8061// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8062// representation.
8065 auto [Dst, Src] = MI.getFirst2Regs();
8066 const LLT S64 = LLT::scalar(64);
8067 const LLT S32 = LLT::scalar(32);
8068 const LLT S1 = LLT::scalar(1);
8069
8070 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8071
8072 // unsigned cul2f(ulong u) {
8073 // uint lz = clz(u);
8074 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8075 // u = (u << lz) & 0x7fffffffffffffffUL;
8076 // ulong t = u & 0xffffffffffUL;
8077 // uint v = (e << 23) | (uint)(u >> 40);
8078 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8079 // return as_float(v + r);
8080 // }
8081
8082 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8083 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8084
8085 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8086
8087 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8088 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8089
8090 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8091 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8092
8093 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8094 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8095
8096 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8097
8098 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8099 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8100
8101 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8102 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8103 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8104
8105 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8106 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8107 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8108 auto One = MIRBuilder.buildConstant(S32, 1);
8109
8110 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8111 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8112 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8113 MIRBuilder.buildAdd(Dst, V, R);
8114
8115 MI.eraseFromParent();
8116 return Legalized;
8117}
8118
8119// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8120// operations and G_SITOFP
8123 auto [Dst, Src] = MI.getFirst2Regs();
8124 const LLT S64 = LLT::scalar(64);
8125 const LLT S32 = LLT::scalar(32);
8126 const LLT S1 = LLT::scalar(1);
8127
8128 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8129
8130 // For i64 < INT_MAX we simply reuse SITOFP.
8131 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8132 // saved before division, convert to float by SITOFP, multiply the result
8133 // by 2.
8134 auto One = MIRBuilder.buildConstant(S64, 1);
8135 auto Zero = MIRBuilder.buildConstant(S64, 0);
8136 // Result if Src < INT_MAX
8137 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8138 // Result if Src >= INT_MAX
8139 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8140 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8141 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8142 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8143 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8144 // Check if the original value is larger than INT_MAX by comparing with
8145 // zero to pick one of the two conversions.
8146 auto IsLarge =
8147 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8148 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8149
8150 MI.eraseFromParent();
8151 return Legalized;
8152}
8153
8154// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8155// IEEE double representation.
8158 auto [Dst, Src] = MI.getFirst2Regs();
8159 const LLT S64 = LLT::scalar(64);
8160 const LLT S32 = LLT::scalar(32);
8161
8162 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8163
8164 // We create double value from 32 bit parts with 32 exponent difference.
8165 // Note that + and - are float operations that adjust the implicit leading
8166 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8167 //
8168 // X = 2^52 * 1.0...LowBits
8169 // Y = 2^84 * 1.0...HighBits
8170 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8171 // = - 2^52 * 1.0...HighBits
8172 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8173 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8174 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8175 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8176 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8177 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8178
8179 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8180 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8181 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8182 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8183 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8184 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8185 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8186
8187 MI.eraseFromParent();
8188 return Legalized;
8189}
8190
8191/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8192/// convert fpround f64->f16 without double-rounding, so we manually perform the
8193/// lowering here where we know it is valid.
8196 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8197 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8198 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8199 : MIRBuilder.buildSITOFP(SrcTy, Src);
8200 LLT S32Ty = SrcTy.changeElementSize(32);
8201 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8202 MIRBuilder.buildFPTrunc(Dst, M2);
8203 MI.eraseFromParent();
8205}
8206
8208 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8209
8210 if (SrcTy == LLT::scalar(1)) {
8211 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8212 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8213 MIRBuilder.buildSelect(Dst, Src, True, False);
8214 MI.eraseFromParent();
8215 return Legalized;
8216 }
8217
8218 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8219 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8220
8221 if (SrcTy != LLT::scalar(64))
8222 return UnableToLegalize;
8223
8224 if (DstTy == LLT::scalar(32))
8225 // TODO: SelectionDAG has several alternative expansions to port which may
8226 // be more reasonable depending on the available instructions. We also need
8227 // a more advanced mechanism to choose an optimal version depending on
8228 // target features such as sitofp or CTLZ availability.
8230
8231 if (DstTy == LLT::scalar(64))
8233
8234 return UnableToLegalize;
8235}
8236
8238 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8239
8240 const LLT S64 = LLT::scalar(64);
8241 const LLT S32 = LLT::scalar(32);
8242 const LLT S1 = LLT::scalar(1);
8243
8244 if (SrcTy == S1) {
8245 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8246 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8247 MIRBuilder.buildSelect(Dst, Src, True, False);
8248 MI.eraseFromParent();
8249 return Legalized;
8250 }
8251
8252 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8253 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8254
8255 if (SrcTy != S64)
8256 return UnableToLegalize;
8257
8258 if (DstTy == S32) {
8259 // signed cl2f(long l) {
8260 // long s = l >> 63;
8261 // float r = cul2f((l + s) ^ s);
8262 // return s ? -r : r;
8263 // }
8264 Register L = Src;
8265 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8266 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8267
8268 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8269 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8270 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8271
8272 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8273 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8274 MIRBuilder.buildConstant(S64, 0));
8275 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8276 MI.eraseFromParent();
8277 return Legalized;
8278 }
8279
8280 return UnableToLegalize;
8281}
8282
8284 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8285 const LLT S64 = LLT::scalar(64);
8286 const LLT S32 = LLT::scalar(32);
8287
8288 if (SrcTy != S64 && SrcTy != S32)
8289 return UnableToLegalize;
8290 if (DstTy != S32 && DstTy != S64)
8291 return UnableToLegalize;
8292
8293 // FPTOSI gives same result as FPTOUI for positive signed integers.
8294 // FPTOUI needs to deal with fp values that convert to unsigned integers
8295 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8296
8297 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8298 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8300 APInt::getZero(SrcTy.getSizeInBits()));
8301 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8302
8303 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8304
8305 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8306 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8307 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8308 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8309 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8310 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8311 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8312
8313 const LLT S1 = LLT::scalar(1);
8314
8315 MachineInstrBuilder FCMP =
8316 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8317 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8318
8319 MI.eraseFromParent();
8320 return Legalized;
8321}
8322
8324 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8325 const LLT S64 = LLT::scalar(64);
8326 const LLT S32 = LLT::scalar(32);
8327
8328 // FIXME: Only f32 to i64 conversions are supported.
8329 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8330 return UnableToLegalize;
8331
8332 // Expand f32 -> i64 conversion
8333 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8334 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8335
8336 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8337
8338 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8339 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8340
8341 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8342 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8343
8344 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8345 APInt::getSignMask(SrcEltBits));
8346 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8347 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8348 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8349 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8350
8351 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8352 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8353 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8354
8355 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8356 R = MIRBuilder.buildZExt(DstTy, R);
8357
8358 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8359 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8360 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8361 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8362
8363 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8364 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8365
8366 const LLT S1 = LLT::scalar(1);
8367 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8368 S1, Exponent, ExponentLoBit);
8369
8370 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8371
8372 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8373 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8374
8375 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8376
8377 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8378 S1, Exponent, ZeroSrcTy);
8379
8380 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8381 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8382
8383 MI.eraseFromParent();
8384 return Legalized;
8385}
8386
8389 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8390
8391 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8392 unsigned SatWidth = DstTy.getScalarSizeInBits();
8393
8394 // Determine minimum and maximum integer values and their corresponding
8395 // floating-point values.
8396 APInt MinInt, MaxInt;
8397 if (IsSigned) {
8398 MinInt = APInt::getSignedMinValue(SatWidth);
8399 MaxInt = APInt::getSignedMaxValue(SatWidth);
8400 } else {
8401 MinInt = APInt::getMinValue(SatWidth);
8402 MaxInt = APInt::getMaxValue(SatWidth);
8403 }
8404
8405 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8406 APFloat MinFloat(Semantics);
8407 APFloat MaxFloat(Semantics);
8408
8409 APFloat::opStatus MinStatus =
8410 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8411 APFloat::opStatus MaxStatus =
8412 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8413 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8414 !(MaxStatus & APFloat::opStatus::opInexact);
8415
8416 // If the integer bounds are exactly representable as floats, emit a
8417 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8418 // and selects.
8419 if (AreExactFloatBounds) {
8420 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8421 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8422 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8423 SrcTy.changeElementSize(1), Src, MaxC);
8424 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8425 // Clamp by MaxFloat from above. NaN cannot occur.
8426 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8427 auto MinP =
8428 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8430 auto Min =
8431 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8432 // Convert clamped value to integer. In the unsigned case we're done,
8433 // because we mapped NaN to MinFloat, which will cast to zero.
8434 if (!IsSigned) {
8435 MIRBuilder.buildFPTOUI(Dst, Min);
8436 MI.eraseFromParent();
8437 return Legalized;
8438 }
8439
8440 // Otherwise, select 0 if Src is NaN.
8441 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8442 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8443 DstTy.changeElementSize(1), Src, Src);
8444 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8445 FpToInt);
8446 MI.eraseFromParent();
8447 return Legalized;
8448 }
8449
8450 // Result of direct conversion. The assumption here is that the operation is
8451 // non-trapping and it's fine to apply it to an out-of-range value if we
8452 // select it away later.
8453 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8454 : MIRBuilder.buildFPTOUI(DstTy, Src);
8455
8456 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8457 // MinInt if Src is NaN.
8458 auto ULT =
8459 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8460 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8461 auto Max = MIRBuilder.buildSelect(
8462 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8463 // If Src OGT MaxFloat, select MaxInt.
8464 auto OGT =
8465 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8466 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8467
8468 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8469 // is already zero.
8470 if (!IsSigned) {
8471 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8472 Max);
8473 MI.eraseFromParent();
8474 return Legalized;
8475 }
8476
8477 // Otherwise, select 0 if Src is NaN.
8478 auto Min = MIRBuilder.buildSelect(
8479 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8480 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8481 DstTy.changeElementSize(1), Src, Src);
8482 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8483 MI.eraseFromParent();
8484 return Legalized;
8485}
8486
8487// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8490 const LLT S1 = LLT::scalar(1);
8491 const LLT S32 = LLT::scalar(32);
8492
8493 auto [Dst, Src] = MI.getFirst2Regs();
8494 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8495 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8496
8497 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8498 return UnableToLegalize;
8499
8500 if (MI.getFlag(MachineInstr::FmAfn)) {
8501 unsigned Flags = MI.getFlags();
8502 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8503 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8504 MI.eraseFromParent();
8505 return Legalized;
8506 }
8507
8508 const unsigned ExpMask = 0x7ff;
8509 const unsigned ExpBiasf64 = 1023;
8510 const unsigned ExpBiasf16 = 15;
8511
8512 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8513 Register U = Unmerge.getReg(0);
8514 Register UH = Unmerge.getReg(1);
8515
8516 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8517 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8518
8519 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8520 // add the f16 bias (15) to get the biased exponent for the f16 format.
8521 E = MIRBuilder.buildAdd(
8522 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8523
8524 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8525 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8526
8527 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8528 MIRBuilder.buildConstant(S32, 0x1ff));
8529 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8530
8531 auto Zero = MIRBuilder.buildConstant(S32, 0);
8532 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8533 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8534 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8535
8536 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8537 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8538 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8539 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8540
8541 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8542 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8543
8544 // N = M | (E << 12);
8545 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8546 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8547
8548 // B = clamp(1-E, 0, 13);
8549 auto One = MIRBuilder.buildConstant(S32, 1);
8550 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8551 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8552 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8553
8554 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8555 MIRBuilder.buildConstant(S32, 0x1000));
8556
8557 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8558 auto D0 = MIRBuilder.buildShl(S32, D, B);
8559
8560 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8561 D0, SigSetHigh);
8562 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8563 D = MIRBuilder.buildOr(S32, D, D1);
8564
8565 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8566 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8567
8568 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8569 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8570
8571 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8572 MIRBuilder.buildConstant(S32, 3));
8573 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8574
8575 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8576 MIRBuilder.buildConstant(S32, 5));
8577 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8578
8579 V1 = MIRBuilder.buildOr(S32, V0, V1);
8580 V = MIRBuilder.buildAdd(S32, V, V1);
8581
8582 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8583 E, MIRBuilder.buildConstant(S32, 30));
8584 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8585 MIRBuilder.buildConstant(S32, 0x7c00), V);
8586
8587 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8588 E, MIRBuilder.buildConstant(S32, 1039));
8589 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8590
8591 // Extract the sign bit.
8592 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8593 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8594
8595 // Insert the sign bit
8596 V = MIRBuilder.buildOr(S32, Sign, V);
8597
8598 MIRBuilder.buildTrunc(Dst, V);
8599 MI.eraseFromParent();
8600 return Legalized;
8601}
8602
8605 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8606 const LLT S64 = LLT::scalar(64);
8607 const LLT S16 = LLT::scalar(16);
8608
8609 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8611
8612 return UnableToLegalize;
8613}
8614
8616 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8617 LLT Ty = MRI.getType(Dst);
8618
8619 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8620 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8621 MI.eraseFromParent();
8622 return Legalized;
8623}
8624
8626 switch (Opc) {
8627 case TargetOpcode::G_SMIN:
8628 return CmpInst::ICMP_SLT;
8629 case TargetOpcode::G_SMAX:
8630 return CmpInst::ICMP_SGT;
8631 case TargetOpcode::G_UMIN:
8632 return CmpInst::ICMP_ULT;
8633 case TargetOpcode::G_UMAX:
8634 return CmpInst::ICMP_UGT;
8635 default:
8636 llvm_unreachable("not in integer min/max");
8637 }
8638}
8639
8641 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8642
8643 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8644 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8645
8646 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8647 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8648
8649 MI.eraseFromParent();
8650 return Legalized;
8651}
8652
8655 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8656
8657 Register Dst = Cmp->getReg(0);
8658 LLT DstTy = MRI.getType(Dst);
8659 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8660 LLT CmpTy = DstTy.changeElementSize(1);
8661
8662 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8665 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8668
8669 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8670 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8671 Cmp->getRHSReg());
8672 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8673 Cmp->getRHSReg());
8674
8675 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8676 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8677 if (TLI.preferSelectsOverBooleanArithmetic(
8678 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8680 auto One = MIRBuilder.buildConstant(DstTy, 1);
8681 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8682
8683 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8684 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8685 } else {
8687 std::swap(IsGT, IsLT);
8688 // Extend boolean results to DstTy, which is at least i2, before subtracting
8689 // them.
8690 unsigned BoolExtOp =
8691 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8692 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8693 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8694 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8695 }
8696
8697 MI.eraseFromParent();
8698 return Legalized;
8699}
8700
8703 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8704 const int Src0Size = Src0Ty.getScalarSizeInBits();
8705 const int Src1Size = Src1Ty.getScalarSizeInBits();
8706
8707 auto SignBitMask = MIRBuilder.buildConstant(
8708 Src0Ty, APInt::getSignMask(Src0Size));
8709
8710 auto NotSignBitMask = MIRBuilder.buildConstant(
8711 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8712
8713 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8714 Register And1;
8715 if (Src0Ty == Src1Ty) {
8716 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8717 } else if (Src0Size > Src1Size) {
8718 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8719 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8720 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8721 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8722 } else {
8723 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8724 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8725 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8726 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8727 }
8728
8729 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8730 // constants are a nan and -0.0, but the final result should preserve
8731 // everything.
8732 unsigned Flags = MI.getFlags();
8733
8734 // We masked the sign bit and the not-sign bit, so these are disjoint.
8735 Flags |= MachineInstr::Disjoint;
8736
8737 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8738
8739 MI.eraseFromParent();
8740 return Legalized;
8741}
8742
8745 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8746 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8747 // depend on fminnum/fmaxnum.
8748
8749 unsigned NewOp;
8750 switch (MI.getOpcode()) {
8751 case TargetOpcode::G_FMINNUM:
8752 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8753 break;
8754 case TargetOpcode::G_FMINIMUMNUM:
8755 NewOp = TargetOpcode::G_FMINNUM;
8756 break;
8757 case TargetOpcode::G_FMAXNUM:
8758 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8759 break;
8760 case TargetOpcode::G_FMAXIMUMNUM:
8761 NewOp = TargetOpcode::G_FMAXNUM;
8762 break;
8763 default:
8764 llvm_unreachable("unexpected min/max opcode");
8765 }
8766
8767 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8768 LLT Ty = MRI.getType(Dst);
8769
8770 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8771 // Insert canonicalizes if it's possible we need to quiet to get correct
8772 // sNaN behavior.
8773
8774 // Note this must be done here, and not as an optimization combine in the
8775 // absence of a dedicate quiet-snan instruction as we're using an
8776 // omni-purpose G_FCANONICALIZE.
8777 if (!isKnownNeverSNaN(Src0, MRI))
8778 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8779
8780 if (!isKnownNeverSNaN(Src1, MRI))
8781 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8782 }
8783
8784 // If there are no nans, it's safe to simply replace this with the non-IEEE
8785 // version.
8786 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8787 MI.eraseFromParent();
8788 return Legalized;
8789}
8790
8793 unsigned Opc = MI.getOpcode();
8794 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8795 LLT Ty = MRI.getType(Dst);
8796 LLT CmpTy = Ty.changeElementSize(1);
8797
8798 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8799 unsigned OpcIeee =
8800 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8801 unsigned OpcNonIeee =
8802 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8803 bool MinMaxMustRespectOrderedZero = false;
8804 Register Res;
8805
8806 // IEEE variants don't need canonicalization
8807 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8808 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8809 MinMaxMustRespectOrderedZero = true;
8810 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8811 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8812 } else {
8813 auto Compare = MIRBuilder.buildFCmp(
8814 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8815 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8816 }
8817
8818 // Propagate any NaN of both operands
8819 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8820 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8821 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8822
8823 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8824 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8825 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8826 if (Ty.isVector())
8827 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8828
8829 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8830 }
8831
8832 // fminimum/fmaximum requires -0.0 less than +0.0
8833 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8834 GISelValueTracking VT(MIRBuilder.getMF());
8835 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8836 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8837
8838 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8839 const unsigned Flags = MI.getFlags();
8840 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8841 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8842
8843 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8844
8845 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8846 auto LHSSelect =
8847 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8848
8849 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8850 auto RHSSelect =
8851 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8852
8853 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8854 }
8855 }
8856
8857 MIRBuilder.buildCopy(Dst, Res);
8858 MI.eraseFromParent();
8859 return Legalized;
8860}
8861
8863 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8864 Register DstReg = MI.getOperand(0).getReg();
8865 LLT Ty = MRI.getType(DstReg);
8866 unsigned Flags = MI.getFlags();
8867
8868 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8869 Flags);
8870 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8871 MI.eraseFromParent();
8872 return Legalized;
8873}
8874
8877 auto [DstReg, X] = MI.getFirst2Regs();
8878 const unsigned Flags = MI.getFlags();
8879 const LLT Ty = MRI.getType(DstReg);
8880 const LLT CondTy = Ty.changeElementSize(1);
8881
8882 // round(x) =>
8883 // t = trunc(x);
8884 // d = fabs(x - t);
8885 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8886 // return t + o;
8887
8888 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8889
8890 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8891 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8892
8893 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8894 auto Cmp =
8895 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8896
8897 // Could emit G_UITOFP instead
8898 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8899 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8900 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8901 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8902
8903 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8904
8905 MI.eraseFromParent();
8906 return Legalized;
8907}
8908
8910 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8911 unsigned Flags = MI.getFlags();
8912 LLT Ty = MRI.getType(DstReg);
8913 const LLT CondTy = Ty.changeElementSize(1);
8914
8915 // result = trunc(src);
8916 // if (src < 0.0 && src != result)
8917 // result += -1.0.
8918
8919 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8920 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8921
8922 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8923 SrcReg, Zero, Flags);
8924 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8925 SrcReg, Trunc, Flags);
8926 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8927 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8928
8929 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8930 MI.eraseFromParent();
8931 return Legalized;
8932}
8933
8936 const unsigned NumOps = MI.getNumOperands();
8937 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8938 unsigned PartSize = Src0Ty.getSizeInBits();
8939
8940 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8941 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8942
8943 for (unsigned I = 2; I != NumOps; ++I) {
8944 const unsigned Offset = (I - 1) * PartSize;
8945
8946 Register SrcReg = MI.getOperand(I).getReg();
8947 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8948
8949 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8950 MRI.createGenericVirtualRegister(WideTy);
8951
8952 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8953 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8954 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8955 ResultReg = NextResult;
8956 }
8957
8958 if (DstTy.isPointer()) {
8959 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8960 DstTy.getAddressSpace())) {
8961 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8962 return UnableToLegalize;
8963 }
8964
8965 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8966 }
8967
8968 MI.eraseFromParent();
8969 return Legalized;
8970}
8971
8974 const unsigned NumDst = MI.getNumOperands() - 1;
8975 Register SrcReg = MI.getOperand(NumDst).getReg();
8976 Register Dst0Reg = MI.getOperand(0).getReg();
8977 LLT DstTy = MRI.getType(Dst0Reg);
8978 if (DstTy.isPointer())
8979 return UnableToLegalize; // TODO
8980
8981 SrcReg = coerceToScalar(SrcReg);
8982 if (!SrcReg)
8983 return UnableToLegalize;
8984
8985 // Expand scalarizing unmerge as bitcast to integer and shift.
8986 LLT IntTy = MRI.getType(SrcReg);
8987
8988 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8989
8990 const unsigned DstSize = DstTy.getSizeInBits();
8991 unsigned Offset = DstSize;
8992 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8993 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8994 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8995 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8996 }
8997
8998 MI.eraseFromParent();
8999 return Legalized;
9000}
9001
9002/// Lower a vector extract or insert by writing the vector to a stack temporary
9003/// and reloading the element or vector.
9004///
9005/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9006/// =>
9007/// %stack_temp = G_FRAME_INDEX
9008/// G_STORE %vec, %stack_temp
9009/// %idx = clamp(%idx, %vec.getNumElements())
9010/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9011/// %dst = G_LOAD %element_ptr
9014 Register DstReg = MI.getOperand(0).getReg();
9015 Register SrcVec = MI.getOperand(1).getReg();
9016 Register InsertVal;
9017 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9018 InsertVal = MI.getOperand(2).getReg();
9019
9020 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9021
9022 LLT VecTy = MRI.getType(SrcVec);
9023 LLT EltTy = VecTy.getElementType();
9024 unsigned NumElts = VecTy.getNumElements();
9025
9026 int64_t IdxVal;
9027 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9029 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9030
9031 if (InsertVal) {
9032 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9033 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9034 } else {
9035 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9036 }
9037
9038 MI.eraseFromParent();
9039 return Legalized;
9040 }
9041
9042 if (!EltTy.isByteSized()) { // Not implemented.
9043 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9044 return UnableToLegalize;
9045 }
9046
9047 unsigned EltBytes = EltTy.getSizeInBytes();
9048 Align VecAlign = getStackTemporaryAlignment(VecTy);
9049 Align EltAlign;
9050
9051 MachinePointerInfo PtrInfo;
9052 auto StackTemp = createStackTemporary(
9053 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9054 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9055
9056 // Get the pointer to the element, and be sure not to hit undefined behavior
9057 // if the index is out of bounds.
9058 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9059
9060 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9061 int64_t Offset = IdxVal * EltBytes;
9062 PtrInfo = PtrInfo.getWithOffset(Offset);
9063 EltAlign = commonAlignment(VecAlign, Offset);
9064 } else {
9065 // We lose information with a variable offset.
9066 EltAlign = getStackTemporaryAlignment(EltTy);
9067 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9068 }
9069
9070 if (InsertVal) {
9071 // Write the inserted element
9072 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9073
9074 // Reload the whole vector.
9075 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9076 } else {
9077 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9078 }
9079
9080 MI.eraseFromParent();
9081 return Legalized;
9082}
9083
9086 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9087 MI.getFirst3RegLLTs();
9088 LLT IdxTy = LLT::scalar(32);
9089
9090 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9091 Register Undef;
9093 LLT EltTy = DstTy.getScalarType();
9094
9095 DenseMap<unsigned, Register> CachedExtract;
9096
9097 for (int Idx : Mask) {
9098 if (Idx < 0) {
9099 if (!Undef.isValid())
9100 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9101 BuildVec.push_back(Undef);
9102 continue;
9103 }
9104
9105 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9106
9107 int NumElts = Src0Ty.getNumElements();
9108 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9109 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9110 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9111 if (Inserted) {
9112 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9113 It->second =
9114 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9115 }
9116 BuildVec.push_back(It->second);
9117 }
9118
9119 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9120 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9121 MI.eraseFromParent();
9122 return Legalized;
9123}
9124
9127 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9128 MI.getFirst4RegLLTs();
9129
9130 if (VecTy.isScalableVector())
9131 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9132
9133 Align VecAlign = getStackTemporaryAlignment(VecTy);
9134 MachinePointerInfo PtrInfo;
9135 Register StackPtr =
9136 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9137 PtrInfo)
9138 .getReg(0);
9139 MachinePointerInfo ValPtrInfo =
9141
9142 LLT IdxTy = LLT::scalar(32);
9143 LLT ValTy = VecTy.getElementType();
9144 Align ValAlign = getStackTemporaryAlignment(ValTy);
9145
9146 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9147
9148 bool HasPassthru =
9149 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9150
9151 if (HasPassthru)
9152 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9153
9154 Register LastWriteVal;
9155 std::optional<APInt> PassthruSplatVal =
9156 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9157
9158 if (PassthruSplatVal.has_value()) {
9159 LastWriteVal =
9160 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9161 } else if (HasPassthru) {
9162 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9163 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9164 {LLT::scalar(32)}, {Popcount});
9165
9166 Register LastElmtPtr =
9167 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9168 LastWriteVal =
9169 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9170 .getReg(0);
9171 }
9172
9173 unsigned NumElmts = VecTy.getNumElements();
9174 for (unsigned I = 0; I < NumElmts; ++I) {
9175 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9176 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9177 Register ElmtPtr =
9178 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9179 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9180
9181 LLT MaskITy = MaskTy.getElementType();
9182 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9183 if (MaskITy.getSizeInBits() > 1)
9184 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9185
9186 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9187 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9188
9189 if (HasPassthru && I == NumElmts - 1) {
9190 auto EndOfVector =
9191 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9192 auto AllLanesSelected = MIRBuilder.buildICmp(
9193 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9194 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9195 {OutPos, EndOfVector});
9196 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9197
9198 LastWriteVal =
9199 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9200 .getReg(0);
9201 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9202 }
9203 }
9204
9205 // TODO: Use StackPtr's FrameIndex alignment.
9206 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9207
9208 MI.eraseFromParent();
9209 return Legalized;
9210}
9211
9213 Register AllocSize,
9214 Align Alignment,
9215 LLT PtrTy) {
9216 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9217
9218 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9219 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9220
9221 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9222 // have to generate an extra instruction to negate the alloc and then use
9223 // G_PTR_ADD to add the negative offset.
9224 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9225 if (Alignment > Align(1)) {
9226 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9227 AlignMask.negate();
9228 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9229 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9230 }
9231
9232 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9233}
9234
9237 const auto &MF = *MI.getMF();
9238 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9239 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9240 return UnableToLegalize;
9241
9242 Register Dst = MI.getOperand(0).getReg();
9243 Register AllocSize = MI.getOperand(1).getReg();
9244 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9245
9246 LLT PtrTy = MRI.getType(Dst);
9247 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9248 Register SPTmp =
9249 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9250
9251 MIRBuilder.buildCopy(SPReg, SPTmp);
9252 MIRBuilder.buildCopy(Dst, SPTmp);
9253
9254 MI.eraseFromParent();
9255 return Legalized;
9256}
9257
9260 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9261 if (!StackPtr)
9262 return UnableToLegalize;
9263
9264 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9265 MI.eraseFromParent();
9266 return Legalized;
9267}
9268
9271 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9272 if (!StackPtr)
9273 return UnableToLegalize;
9274
9275 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9276 MI.eraseFromParent();
9277 return Legalized;
9278}
9279
9282 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9283 unsigned Offset = MI.getOperand(2).getImm();
9284
9285 // Extract sub-vector or one element
9286 if (SrcTy.isVector()) {
9287 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9288 unsigned DstSize = DstTy.getSizeInBits();
9289
9290 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9291 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9292 // Unmerge and allow access to each Src element for the artifact combiner.
9293 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9294
9295 // Take element(s) we need to extract and copy it (merge them).
9296 SmallVector<Register, 8> SubVectorElts;
9297 for (unsigned Idx = Offset / SrcEltSize;
9298 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9299 SubVectorElts.push_back(Unmerge.getReg(Idx));
9300 }
9301 if (SubVectorElts.size() == 1)
9302 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9303 else
9304 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9305
9306 MI.eraseFromParent();
9307 return Legalized;
9308 }
9309 }
9310
9311 if (DstTy.isScalar() &&
9312 (SrcTy.isScalar() ||
9313 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9314 LLT SrcIntTy = SrcTy;
9315 if (!SrcTy.isScalar()) {
9316 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9317 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9318 }
9319
9320 if (Offset == 0)
9321 MIRBuilder.buildTrunc(DstReg, SrcReg);
9322 else {
9323 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9324 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9325 MIRBuilder.buildTrunc(DstReg, Shr);
9326 }
9327
9328 MI.eraseFromParent();
9329 return Legalized;
9330 }
9331
9332 return UnableToLegalize;
9333}
9334
9336 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9337 uint64_t Offset = MI.getOperand(3).getImm();
9338
9339 LLT DstTy = MRI.getType(Src);
9340 LLT InsertTy = MRI.getType(InsertSrc);
9341
9342 // Insert sub-vector or one element
9343 if (DstTy.isVector() && !InsertTy.isPointer()) {
9344 LLT EltTy = DstTy.getElementType();
9345 unsigned EltSize = EltTy.getSizeInBits();
9346 unsigned InsertSize = InsertTy.getSizeInBits();
9347
9348 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9349 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9350 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9352 unsigned Idx = 0;
9353 // Elements from Src before insert start Offset
9354 for (; Idx < Offset / EltSize; ++Idx) {
9355 DstElts.push_back(UnmergeSrc.getReg(Idx));
9356 }
9357
9358 // Replace elements in Src with elements from InsertSrc
9359 if (InsertTy.getSizeInBits() > EltSize) {
9360 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9361 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9362 ++Idx, ++i) {
9363 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9364 }
9365 } else {
9366 DstElts.push_back(InsertSrc);
9367 ++Idx;
9368 }
9369
9370 // Remaining elements from Src after insert
9371 for (; Idx < DstTy.getNumElements(); ++Idx) {
9372 DstElts.push_back(UnmergeSrc.getReg(Idx));
9373 }
9374
9375 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9376 MI.eraseFromParent();
9377 return Legalized;
9378 }
9379 }
9380
9381 if (InsertTy.isVector() ||
9382 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9383 return UnableToLegalize;
9384
9385 const DataLayout &DL = MIRBuilder.getDataLayout();
9386 if ((DstTy.isPointer() &&
9387 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9388 (InsertTy.isPointer() &&
9389 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9390 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9391 return UnableToLegalize;
9392 }
9393
9394 LLT IntDstTy = DstTy;
9395
9396 if (!DstTy.isScalar()) {
9397 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9398 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9399 }
9400
9401 if (!InsertTy.isScalar()) {
9402 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9403 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9404 }
9405
9406 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9407 if (Offset != 0) {
9408 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9409 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9410 }
9411
9413 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9414
9415 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9416 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9417 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9418
9419 MIRBuilder.buildCast(Dst, Or);
9420 MI.eraseFromParent();
9421 return Legalized;
9422}
9423
9426 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9427 MI.getFirst4RegLLTs();
9428 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9429
9430 LLT Ty = Dst0Ty;
9431 LLT BoolTy = Dst1Ty;
9432
9433 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9434
9435 if (IsAdd)
9436 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9437 else
9438 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9439
9440 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9441
9442 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9443
9444 // For an addition, the result should be less than one of the operands (LHS)
9445 // if and only if the other operand (RHS) is negative, otherwise there will
9446 // be overflow.
9447 // For a subtraction, the result should be less than one of the operands
9448 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9449 // otherwise there will be overflow.
9450 auto ResultLowerThanLHS =
9451 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9452 auto ConditionRHS = MIRBuilder.buildICmp(
9453 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9454
9455 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9456
9457 MIRBuilder.buildCopy(Dst0, NewDst0);
9458 MI.eraseFromParent();
9459
9460 return Legalized;
9461}
9462
9464 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9465 const LLT Ty = MRI.getType(Res);
9466
9467 // sum = LHS + RHS + zext(CarryIn)
9468 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9469 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9470 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9471 MIRBuilder.buildCopy(Res, Sum);
9472
9473 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9474 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9475 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9476 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9477
9478 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9479 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9480
9481 MI.eraseFromParent();
9482 return Legalized;
9483}
9484
9486 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9487 const LLT Ty = MRI.getType(Res);
9488
9489 // Diff = LHS - (RHS + zext(CarryIn))
9490 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9491 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9492 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9493 MIRBuilder.buildCopy(Res, Diff);
9494
9495 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9496 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9497 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9498 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9499 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9500 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9501
9502 MI.eraseFromParent();
9503 return Legalized;
9504}
9505
9508 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9509 LLT Ty = MRI.getType(Res);
9510 bool IsSigned;
9511 bool IsAdd;
9512 unsigned BaseOp;
9513 switch (MI.getOpcode()) {
9514 default:
9515 llvm_unreachable("unexpected addsat/subsat opcode");
9516 case TargetOpcode::G_UADDSAT:
9517 IsSigned = false;
9518 IsAdd = true;
9519 BaseOp = TargetOpcode::G_ADD;
9520 break;
9521 case TargetOpcode::G_SADDSAT:
9522 IsSigned = true;
9523 IsAdd = true;
9524 BaseOp = TargetOpcode::G_ADD;
9525 break;
9526 case TargetOpcode::G_USUBSAT:
9527 IsSigned = false;
9528 IsAdd = false;
9529 BaseOp = TargetOpcode::G_SUB;
9530 break;
9531 case TargetOpcode::G_SSUBSAT:
9532 IsSigned = true;
9533 IsAdd = false;
9534 BaseOp = TargetOpcode::G_SUB;
9535 break;
9536 }
9537
9538 if (IsSigned) {
9539 // sadd.sat(a, b) ->
9540 // hi = 0x7fffffff - smax(a, 0)
9541 // lo = 0x80000000 - smin(a, 0)
9542 // a + smin(smax(lo, b), hi)
9543 // ssub.sat(a, b) ->
9544 // lo = smax(a, -1) - 0x7fffffff
9545 // hi = smin(a, -1) - 0x80000000
9546 // a - smin(smax(lo, b), hi)
9547 // TODO: AMDGPU can use a "median of 3" instruction here:
9548 // a +/- med3(lo, b, hi)
9549 uint64_t NumBits = Ty.getScalarSizeInBits();
9550 auto MaxVal =
9551 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9552 auto MinVal =
9553 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9555 if (IsAdd) {
9556 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9557 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9558 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9559 } else {
9560 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9561 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9562 MaxVal);
9563 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9564 MinVal);
9565 }
9566 auto RHSClamped =
9567 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9568 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9569 } else {
9570 // uadd.sat(a, b) -> a + umin(~a, b)
9571 // usub.sat(a, b) -> a - umin(a, b)
9572 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9573 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9574 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9575 }
9576
9577 MI.eraseFromParent();
9578 return Legalized;
9579}
9580
9583 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9584 LLT Ty = MRI.getType(Res);
9585 LLT BoolTy = Ty.changeElementSize(1);
9586 bool IsSigned;
9587 bool IsAdd;
9588 unsigned OverflowOp;
9589 switch (MI.getOpcode()) {
9590 default:
9591 llvm_unreachable("unexpected addsat/subsat opcode");
9592 case TargetOpcode::G_UADDSAT:
9593 IsSigned = false;
9594 IsAdd = true;
9595 OverflowOp = TargetOpcode::G_UADDO;
9596 break;
9597 case TargetOpcode::G_SADDSAT:
9598 IsSigned = true;
9599 IsAdd = true;
9600 OverflowOp = TargetOpcode::G_SADDO;
9601 break;
9602 case TargetOpcode::G_USUBSAT:
9603 IsSigned = false;
9604 IsAdd = false;
9605 OverflowOp = TargetOpcode::G_USUBO;
9606 break;
9607 case TargetOpcode::G_SSUBSAT:
9608 IsSigned = true;
9609 IsAdd = false;
9610 OverflowOp = TargetOpcode::G_SSUBO;
9611 break;
9612 }
9613
9614 auto OverflowRes =
9615 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9616 Register Tmp = OverflowRes.getReg(0);
9617 Register Ov = OverflowRes.getReg(1);
9618 MachineInstrBuilder Clamp;
9619 if (IsSigned) {
9620 // sadd.sat(a, b) ->
9621 // {tmp, ov} = saddo(a, b)
9622 // ov ? (tmp >>s 31) + 0x80000000 : r
9623 // ssub.sat(a, b) ->
9624 // {tmp, ov} = ssubo(a, b)
9625 // ov ? (tmp >>s 31) + 0x80000000 : r
9626 uint64_t NumBits = Ty.getScalarSizeInBits();
9627 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9628 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9629 auto MinVal =
9630 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9631 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9632 } else {
9633 // uadd.sat(a, b) ->
9634 // {tmp, ov} = uaddo(a, b)
9635 // ov ? 0xffffffff : tmp
9636 // usub.sat(a, b) ->
9637 // {tmp, ov} = usubo(a, b)
9638 // ov ? 0 : tmp
9639 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9640 }
9641 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9642
9643 MI.eraseFromParent();
9644 return Legalized;
9645}
9646
9649 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9650 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9651 "Expected shlsat opcode!");
9652 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9653 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9654 LLT Ty = MRI.getType(Res);
9655 LLT BoolTy = Ty.changeElementSize(1);
9656
9657 unsigned BW = Ty.getScalarSizeInBits();
9658 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9659 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9660 : MIRBuilder.buildLShr(Ty, Result, RHS);
9661
9662 MachineInstrBuilder SatVal;
9663 if (IsSigned) {
9664 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9665 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9666 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9667 MIRBuilder.buildConstant(Ty, 0));
9668 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9669 } else {
9670 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9671 }
9672 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9673 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9674
9675 MI.eraseFromParent();
9676 return Legalized;
9677}
9678
9680 auto [Dst, Src] = MI.getFirst2Regs();
9681 const LLT Ty = MRI.getType(Src);
9682 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9683 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9684
9685 // Swap most and least significant byte, set remaining bytes in Res to zero.
9686 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9687 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9688 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9689 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9690
9691 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9692 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9693 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9694 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9695 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9696 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9697 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9698 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9699 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9700 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9701 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9702 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9703 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9704 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9705 }
9706 Res.getInstr()->getOperand(0).setReg(Dst);
9707
9708 MI.eraseFromParent();
9709 return Legalized;
9710}
9711
9712//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9714 MachineInstrBuilder Src, const APInt &Mask) {
9715 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9716 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9717 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9718 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9719 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9720 return B.buildOr(Dst, LHS, RHS);
9721}
9722
9725 auto [Dst, Src] = MI.getFirst2Regs();
9726 const LLT SrcTy = MRI.getType(Src);
9727 unsigned Size = SrcTy.getScalarSizeInBits();
9728 unsigned VSize = SrcTy.getSizeInBits();
9729
9730 if (Size >= 8) {
9731 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9732 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9733 {LLT::fixed_vector(VSize / 8, 8),
9734 LLT::fixed_vector(VSize / 8, 8)}}))) {
9735 // If bitreverse is legal for i8 vector of the same size, then cast
9736 // to i8 vector type.
9737 // e.g. v4s32 -> v16s8
9738 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9739 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9740 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9741 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9742 MIRBuilder.buildBitcast(Dst, RBIT);
9743 } else {
9744 MachineInstrBuilder BSWAP =
9745 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9746
9747 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9748 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9749 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9750 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9751 APInt::getSplat(Size, APInt(8, 0xF0)));
9752
9753 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9754 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9755 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9756 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9757 APInt::getSplat(Size, APInt(8, 0xCC)));
9758
9759 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9760 // 6|7
9761 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9762 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9763 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9764 }
9765 } else {
9766 // Expand bitreverse for types smaller than 8 bits.
9768 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9770 if (I < J) {
9771 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9772 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9773 } else {
9774 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9775 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9776 }
9777
9778 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9779 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9780 if (I == 0)
9781 Tmp = Tmp2;
9782 else
9783 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9784 }
9785 MIRBuilder.buildCopy(Dst, Tmp);
9786 }
9787
9788 MI.eraseFromParent();
9789 return Legalized;
9790}
9791
9794 MachineFunction &MF = MIRBuilder.getMF();
9795
9796 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9797 int NameOpIdx = IsRead ? 1 : 0;
9798 int ValRegIndex = IsRead ? 0 : 1;
9799
9800 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9801 const LLT Ty = MRI.getType(ValReg);
9802 const MDString *RegStr = cast<MDString>(
9803 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9804
9805 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9806 if (!PhysReg) {
9807 const Function &Fn = MF.getFunction();
9809 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9810 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9811 Fn, MI.getDebugLoc()));
9812 if (IsRead)
9813 MIRBuilder.buildUndef(ValReg);
9814
9815 MI.eraseFromParent();
9816 return Legalized;
9817 }
9818
9819 if (IsRead)
9820 MIRBuilder.buildCopy(ValReg, PhysReg);
9821 else
9822 MIRBuilder.buildCopy(PhysReg, ValReg);
9823
9824 MI.eraseFromParent();
9825 return Legalized;
9826}
9827
9830 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9831 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9832 Register Result = MI.getOperand(0).getReg();
9833 LLT OrigTy = MRI.getType(Result);
9834 auto SizeInBits = OrigTy.getScalarSizeInBits();
9835 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9836
9837 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9838 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9839 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9840 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9841
9842 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9843 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9844 MIRBuilder.buildTrunc(Result, Shifted);
9845
9846 MI.eraseFromParent();
9847 return Legalized;
9848}
9849
9852 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9853 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9854
9855 if (Mask == fcNone) {
9856 MIRBuilder.buildConstant(DstReg, 0);
9857 MI.eraseFromParent();
9858 return Legalized;
9859 }
9860 if (Mask == fcAllFlags) {
9861 MIRBuilder.buildConstant(DstReg, 1);
9862 MI.eraseFromParent();
9863 return Legalized;
9864 }
9865
9866 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9867 // version
9868
9869 unsigned BitSize = SrcTy.getScalarSizeInBits();
9870 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9871
9872 LLT IntTy = LLT::scalar(BitSize);
9873 if (SrcTy.isVector())
9874 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9875 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9876
9877 // Various masks.
9878 APInt SignBit = APInt::getSignMask(BitSize);
9879 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9880 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9881 APInt ExpMask = Inf;
9882 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9883 APInt QNaNBitMask =
9884 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9885 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9886
9887 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9888 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9889 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9890 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9891 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9892
9893 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9894 auto Sign =
9895 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9896
9897 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9898 // Clang doesn't support capture of structured bindings:
9899 LLT DstTyCopy = DstTy;
9900 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9901 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9902 };
9903
9904 // Tests that involve more than one class should be processed first.
9905 if ((Mask & fcFinite) == fcFinite) {
9906 // finite(V) ==> abs(V) u< exp_mask
9907 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9908 ExpMaskC));
9909 Mask &= ~fcFinite;
9910 } else if ((Mask & fcFinite) == fcPosFinite) {
9911 // finite(V) && V > 0 ==> V u< exp_mask
9912 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9913 ExpMaskC));
9914 Mask &= ~fcPosFinite;
9915 } else if ((Mask & fcFinite) == fcNegFinite) {
9916 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9917 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9918 ExpMaskC);
9919 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9920 appendToRes(And);
9921 Mask &= ~fcNegFinite;
9922 }
9923
9924 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9925 // fcZero | fcSubnormal => test all exponent bits are 0
9926 // TODO: Handle sign bit specific cases
9927 // TODO: Handle inverted case
9928 if (PartialCheck == (fcZero | fcSubnormal)) {
9929 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9930 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9931 ExpBits, ZeroC));
9932 Mask &= ~PartialCheck;
9933 }
9934 }
9935
9936 // Check for individual classes.
9937 if (FPClassTest PartialCheck = Mask & fcZero) {
9938 if (PartialCheck == fcPosZero)
9939 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9940 AsInt, ZeroC));
9941 else if (PartialCheck == fcZero)
9942 appendToRes(
9943 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9944 else // fcNegZero
9945 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9946 AsInt, SignBitC));
9947 }
9948
9949 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9950 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9951 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9952 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9953 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9954 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9955 auto SubnormalRes =
9956 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9957 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9958 if (PartialCheck == fcNegSubnormal)
9959 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9960 appendToRes(SubnormalRes);
9961 }
9962
9963 if (FPClassTest PartialCheck = Mask & fcInf) {
9964 if (PartialCheck == fcPosInf)
9965 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9966 AsInt, InfC));
9967 else if (PartialCheck == fcInf)
9968 appendToRes(
9969 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9970 else { // fcNegInf
9971 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9972 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9973 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9974 AsInt, NegInfC));
9975 }
9976 }
9977
9978 if (FPClassTest PartialCheck = Mask & fcNan) {
9979 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9980 if (PartialCheck == fcNan) {
9981 // isnan(V) ==> abs(V) u> int(inf)
9982 appendToRes(
9983 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9984 } else if (PartialCheck == fcQNan) {
9985 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9986 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9987 InfWithQnanBitC));
9988 } else { // fcSNan
9989 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9990 // abs(V) u< (unsigned(Inf) | quiet_bit)
9991 auto IsNan =
9992 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9993 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9994 Abs, InfWithQnanBitC);
9995 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9996 }
9997 }
9998
9999 if (FPClassTest PartialCheck = Mask & fcNormal) {
10000 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10001 // (max_exp-1))
10002 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10003 auto ExpMinusOne = MIRBuilder.buildSub(
10004 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10005 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10006 auto NormalRes =
10007 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10008 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10009 if (PartialCheck == fcNegNormal)
10010 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10011 else if (PartialCheck == fcPosNormal) {
10012 auto PosSign = MIRBuilder.buildXor(
10013 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10014 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10015 }
10016 appendToRes(NormalRes);
10017 }
10018
10019 MIRBuilder.buildCopy(DstReg, Res);
10020 MI.eraseFromParent();
10021 return Legalized;
10022}
10023
10025 // Implement G_SELECT in terms of XOR, AND, OR.
10026 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10027 MI.getFirst4RegLLTs();
10028
10029 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10030 if (IsEltPtr) {
10031 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10032 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10033 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10034 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10035 DstTy = NewTy;
10036 }
10037
10038 if (MaskTy.isScalar()) {
10039 // Turn the scalar condition into a vector condition mask if needed.
10040
10041 Register MaskElt = MaskReg;
10042
10043 // The condition was potentially zero extended before, but we want a sign
10044 // extended boolean.
10045 if (MaskTy != LLT::scalar(1))
10046 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10047
10048 // Continue the sign extension (or truncate) to match the data type.
10049 MaskElt =
10050 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10051
10052 if (DstTy.isVector()) {
10053 // Generate a vector splat idiom.
10054 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10055 MaskReg = ShufSplat.getReg(0);
10056 } else {
10057 MaskReg = MaskElt;
10058 }
10059 MaskTy = DstTy;
10060 } else if (!DstTy.isVector()) {
10061 // Cannot handle the case that mask is a vector and dst is a scalar.
10062 return UnableToLegalize;
10063 }
10064
10065 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10066 return UnableToLegalize;
10067 }
10068
10069 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10070 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10071 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10072 if (IsEltPtr) {
10073 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10074 MIRBuilder.buildIntToPtr(DstReg, Or);
10075 } else {
10076 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10077 }
10078 MI.eraseFromParent();
10079 return Legalized;
10080}
10081
10083 // Split DIVREM into individual instructions.
10084 unsigned Opcode = MI.getOpcode();
10085
10086 MIRBuilder.buildInstr(
10087 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10088 : TargetOpcode::G_UDIV,
10089 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10090 MIRBuilder.buildInstr(
10091 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10092 : TargetOpcode::G_UREM,
10093 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10094 MI.eraseFromParent();
10095 return Legalized;
10096}
10097
10100 // Expand %res = G_ABS %a into:
10101 // %v1 = G_ASHR %a, scalar_size-1
10102 // %v2 = G_ADD %a, %v1
10103 // %res = G_XOR %v2, %v1
10104 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10105 Register OpReg = MI.getOperand(1).getReg();
10106 auto ShiftAmt =
10107 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10108 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10109 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10110 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10111 MI.eraseFromParent();
10112 return Legalized;
10113}
10114
10117 // Expand %res = G_ABS %a into:
10118 // %v1 = G_CONSTANT 0
10119 // %v2 = G_SUB %v1, %a
10120 // %res = G_SMAX %a, %v2
10121 Register SrcReg = MI.getOperand(1).getReg();
10122 LLT Ty = MRI.getType(SrcReg);
10123 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10124 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10125 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10126 MI.eraseFromParent();
10127 return Legalized;
10128}
10129
10132 Register SrcReg = MI.getOperand(1).getReg();
10133 Register DestReg = MI.getOperand(0).getReg();
10134 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10135 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10136 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10137 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10138 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10139 MI.eraseFromParent();
10140 return Legalized;
10141}
10142
10145 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10146 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10147 "Expected G_ABDS or G_ABDU instruction");
10148
10149 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10150 LLT Ty = MRI.getType(LHS);
10151
10152 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10153 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10154 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10155 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10156 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10159 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10160 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10161
10162 MI.eraseFromParent();
10163 return Legalized;
10164}
10165
10168 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10169 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10170 "Expected G_ABDS or G_ABDU instruction");
10171
10172 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10173 LLT Ty = MRI.getType(LHS);
10174
10175 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10176 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10177 Register MaxReg, MinReg;
10178 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10179 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10180 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10181 } else {
10182 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10183 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10184 }
10185 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10186
10187 MI.eraseFromParent();
10188 return Legalized;
10189}
10190
10192 Register SrcReg = MI.getOperand(1).getReg();
10193 Register DstReg = MI.getOperand(0).getReg();
10194
10195 LLT Ty = MRI.getType(DstReg);
10196
10197 // Reset sign bit
10198 MIRBuilder.buildAnd(
10199 DstReg, SrcReg,
10200 MIRBuilder.buildConstant(
10201 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10202
10203 MI.eraseFromParent();
10204 return Legalized;
10205}
10206
10209 Register SrcReg = MI.getOperand(1).getReg();
10210 LLT SrcTy = MRI.getType(SrcReg);
10211 LLT DstTy = MRI.getType(SrcReg);
10212
10213 // The source could be a scalar if the IR type was <1 x sN>.
10214 if (SrcTy.isScalar()) {
10215 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10216 return UnableToLegalize; // FIXME: handle extension.
10217 // This can be just a plain copy.
10218 Observer.changingInstr(MI);
10219 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10220 Observer.changedInstr(MI);
10221 return Legalized;
10222 }
10223 return UnableToLegalize;
10224}
10225
10227 MachineFunction &MF = *MI.getMF();
10228 const DataLayout &DL = MIRBuilder.getDataLayout();
10229 LLVMContext &Ctx = MF.getFunction().getContext();
10230 Register ListPtr = MI.getOperand(1).getReg();
10231 LLT PtrTy = MRI.getType(ListPtr);
10232
10233 // LstPtr is a pointer to the head of the list. Get the address
10234 // of the head of the list.
10235 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10236 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10237 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10238 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10239
10240 const Align A(MI.getOperand(2).getImm());
10241 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10242 if (A > TLI.getMinStackArgumentAlignment()) {
10243 Register AlignAmt =
10244 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10245 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10246 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10247 VAList = AndDst.getReg(0);
10248 }
10249
10250 // Increment the pointer, VAList, to the next vaarg
10251 // The list should be bumped by the size of element in the current head of
10252 // list.
10253 Register Dst = MI.getOperand(0).getReg();
10254 LLT LLTTy = MRI.getType(Dst);
10255 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10256 auto IncAmt =
10257 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10258 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10259
10260 // Store the increment VAList to the legalized pointer
10262 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10263 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10264 // Load the actual argument out of the pointer VAList
10265 Align EltAlignment = DL.getABITypeAlign(Ty);
10266 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10267 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10268 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10269
10270 MI.eraseFromParent();
10271 return Legalized;
10272}
10273
10275 // On Darwin, -Os means optimize for size without hurting performance, so
10276 // only really optimize for size when -Oz (MinSize) is used.
10278 return MF.getFunction().hasMinSize();
10279 return MF.getFunction().hasOptSize();
10280}
10281
10282// Returns a list of types to use for memory op lowering in MemOps. A partial
10283// port of findOptimalMemOpLowering in TargetLowering.
10284static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10285 unsigned Limit, const MemOp &Op,
10286 unsigned DstAS, unsigned SrcAS,
10287 const AttributeList &FuncAttributes,
10288 const TargetLowering &TLI) {
10289 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10290 return false;
10291
10292 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10293
10294 if (Ty == LLT()) {
10295 // Use the largest scalar type whose alignment constraints are satisfied.
10296 // We only need to check DstAlign here as SrcAlign is always greater or
10297 // equal to DstAlign (or zero).
10298 Ty = LLT::scalar(64);
10299 if (Op.isFixedDstAlign())
10300 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10301 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10302 Ty = LLT::scalar(Ty.getSizeInBytes());
10303 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10304 // FIXME: check for the largest legal type we can load/store to.
10305 }
10306
10307 unsigned NumMemOps = 0;
10308 uint64_t Size = Op.size();
10309 while (Size) {
10310 unsigned TySize = Ty.getSizeInBytes();
10311 while (TySize > Size) {
10312 // For now, only use non-vector load / store's for the left-over pieces.
10313 LLT NewTy = Ty;
10314 // FIXME: check for mem op safety and legality of the types. Not all of
10315 // SDAGisms map cleanly to GISel concepts.
10316 if (NewTy.isVector())
10317 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10318 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10319 unsigned NewTySize = NewTy.getSizeInBytes();
10320 assert(NewTySize > 0 && "Could not find appropriate type");
10321
10322 // If the new LLT cannot cover all of the remaining bits, then consider
10323 // issuing a (or a pair of) unaligned and overlapping load / store.
10324 unsigned Fast;
10325 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10326 MVT VT = getMVTForLLT(Ty);
10327 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10329 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10331 Fast)
10332 TySize = Size;
10333 else {
10334 Ty = NewTy;
10335 TySize = NewTySize;
10336 }
10337 }
10338
10339 if (++NumMemOps > Limit)
10340 return false;
10341
10342 MemOps.push_back(Ty);
10343 Size -= TySize;
10344 }
10345
10346 return true;
10347}
10348
10349// Get a vectorized representation of the memset value operand, GISel edition.
10351 MachineRegisterInfo &MRI = *MIB.getMRI();
10352 unsigned NumBits = Ty.getScalarSizeInBits();
10353 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10354 if (!Ty.isVector() && ValVRegAndVal) {
10355 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10356 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10357 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10358 }
10359
10360 // Extend the byte value to the larger type, and then multiply by a magic
10361 // value 0x010101... in order to replicate it across every byte.
10362 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10363 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10364 return MIB.buildConstant(Ty, 0).getReg(0);
10365 }
10366
10367 LLT ExtType = Ty.getScalarType();
10368 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10369 if (NumBits > 8) {
10370 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10371 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10372 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10373 }
10374
10375 // For vector types create a G_BUILD_VECTOR.
10376 if (Ty.isVector())
10377 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10378
10379 return Val;
10380}
10381
10383LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10384 uint64_t KnownLen, Align Alignment,
10385 bool IsVolatile) {
10386 auto &MF = *MI.getParent()->getParent();
10387 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10388 auto &DL = MF.getDataLayout();
10389 LLVMContext &C = MF.getFunction().getContext();
10390
10391 assert(KnownLen != 0 && "Have a zero length memset length!");
10392
10393 bool DstAlignCanChange = false;
10394 MachineFrameInfo &MFI = MF.getFrameInfo();
10395 bool OptSize = shouldLowerMemFuncForSize(MF);
10396
10397 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10398 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10399 DstAlignCanChange = true;
10400
10401 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10402 std::vector<LLT> MemOps;
10403
10404 const auto &DstMMO = **MI.memoperands_begin();
10405 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10406
10407 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10408 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10409
10410 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10411 MemOp::Set(KnownLen, DstAlignCanChange,
10412 Alignment,
10413 /*IsZeroMemset=*/IsZeroVal,
10414 /*IsVolatile=*/IsVolatile),
10415 DstPtrInfo.getAddrSpace(), ~0u,
10416 MF.getFunction().getAttributes(), TLI))
10417 return UnableToLegalize;
10418
10419 if (DstAlignCanChange) {
10420 // Get an estimate of the type from the LLT.
10421 Type *IRTy = getTypeForLLT(MemOps[0], C);
10422 Align NewAlign = DL.getABITypeAlign(IRTy);
10423 if (NewAlign > Alignment) {
10424 Alignment = NewAlign;
10425 unsigned FI = FIDef->getOperand(1).getIndex();
10426 // Give the stack frame object a larger alignment if needed.
10427 if (MFI.getObjectAlign(FI) < Alignment)
10428 MFI.setObjectAlignment(FI, Alignment);
10429 }
10430 }
10431
10432 MachineIRBuilder MIB(MI);
10433 // Find the largest store and generate the bit pattern for it.
10434 LLT LargestTy = MemOps[0];
10435 for (unsigned i = 1; i < MemOps.size(); i++)
10436 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10437 LargestTy = MemOps[i];
10438
10439 // The memset stored value is always defined as an s8, so in order to make it
10440 // work with larger store types we need to repeat the bit pattern across the
10441 // wider type.
10442 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10443
10444 if (!MemSetValue)
10445 return UnableToLegalize;
10446
10447 // Generate the stores. For each store type in the list, we generate the
10448 // matching store of that type to the destination address.
10449 LLT PtrTy = MRI.getType(Dst);
10450 unsigned DstOff = 0;
10451 unsigned Size = KnownLen;
10452 for (unsigned I = 0; I < MemOps.size(); I++) {
10453 LLT Ty = MemOps[I];
10454 unsigned TySize = Ty.getSizeInBytes();
10455 if (TySize > Size) {
10456 // Issuing an unaligned load / store pair that overlaps with the previous
10457 // pair. Adjust the offset accordingly.
10458 assert(I == MemOps.size() - 1 && I != 0);
10459 DstOff -= TySize - Size;
10460 }
10461
10462 // If this store is smaller than the largest store see whether we can get
10463 // the smaller value for free with a truncate.
10464 Register Value = MemSetValue;
10465 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10466 MVT VT = getMVTForLLT(Ty);
10467 MVT LargestVT = getMVTForLLT(LargestTy);
10468 if (!LargestTy.isVector() && !Ty.isVector() &&
10469 TLI.isTruncateFree(LargestVT, VT))
10470 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10471 else
10472 Value = getMemsetValue(Val, Ty, MIB);
10473 if (!Value)
10474 return UnableToLegalize;
10475 }
10476
10477 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10478
10479 Register Ptr = Dst;
10480 if (DstOff != 0) {
10481 auto Offset =
10482 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10483 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10484 }
10485
10486 MIB.buildStore(Value, Ptr, *StoreMMO);
10487 DstOff += Ty.getSizeInBytes();
10488 Size -= TySize;
10489 }
10490
10491 MI.eraseFromParent();
10492 return Legalized;
10493}
10494
10496LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10497 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10498
10499 auto [Dst, Src, Len] = MI.getFirst3Regs();
10500
10501 const auto *MMOIt = MI.memoperands_begin();
10502 const MachineMemOperand *MemOp = *MMOIt;
10503 bool IsVolatile = MemOp->isVolatile();
10504
10505 // See if this is a constant length copy
10506 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10507 // FIXME: support dynamically sized G_MEMCPY_INLINE
10508 assert(LenVRegAndVal &&
10509 "inline memcpy with dynamic size is not yet supported");
10510 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10511 if (KnownLen == 0) {
10512 MI.eraseFromParent();
10513 return Legalized;
10514 }
10515
10516 const auto &DstMMO = **MI.memoperands_begin();
10517 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10518 Align DstAlign = DstMMO.getBaseAlign();
10519 Align SrcAlign = SrcMMO.getBaseAlign();
10520
10521 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10522 IsVolatile);
10523}
10524
10526LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10527 uint64_t KnownLen, Align DstAlign,
10528 Align SrcAlign, bool IsVolatile) {
10529 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10530 return lowerMemcpy(MI, Dst, Src, KnownLen,
10531 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10532 IsVolatile);
10533}
10534
10536LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10537 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10538 Align SrcAlign, bool IsVolatile) {
10539 auto &MF = *MI.getParent()->getParent();
10540 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10541 auto &DL = MF.getDataLayout();
10543
10544 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10545
10546 bool DstAlignCanChange = false;
10547 MachineFrameInfo &MFI = MF.getFrameInfo();
10548 Align Alignment = std::min(DstAlign, SrcAlign);
10549
10550 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10551 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10552 DstAlignCanChange = true;
10553
10554 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10555 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10556 // if the memcpy is in a tail call position.
10557
10558 std::vector<LLT> MemOps;
10559
10560 const auto &DstMMO = **MI.memoperands_begin();
10561 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10562 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10563 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10564
10566 MemOps, Limit,
10567 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10568 IsVolatile),
10569 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10570 MF.getFunction().getAttributes(), TLI))
10571 return UnableToLegalize;
10572
10573 if (DstAlignCanChange) {
10574 // Get an estimate of the type from the LLT.
10575 Type *IRTy = getTypeForLLT(MemOps[0], C);
10576 Align NewAlign = DL.getABITypeAlign(IRTy);
10577
10578 // Don't promote to an alignment that would require dynamic stack
10579 // realignment.
10581 if (!TRI->hasStackRealignment(MF))
10582 if (MaybeAlign StackAlign = DL.getStackAlignment())
10583 NewAlign = std::min(NewAlign, *StackAlign);
10584
10585 if (NewAlign > Alignment) {
10586 Alignment = NewAlign;
10587 unsigned FI = FIDef->getOperand(1).getIndex();
10588 // Give the stack frame object a larger alignment if needed.
10589 if (MFI.getObjectAlign(FI) < Alignment)
10590 MFI.setObjectAlignment(FI, Alignment);
10591 }
10592 }
10593
10594 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10595
10596 MachineIRBuilder MIB(MI);
10597 // Now we need to emit a pair of load and stores for each of the types we've
10598 // collected. I.e. for each type, generate a load from the source pointer of
10599 // that type width, and then generate a corresponding store to the dest buffer
10600 // of that value loaded. This can result in a sequence of loads and stores
10601 // mixed types, depending on what the target specifies as good types to use.
10602 unsigned CurrOffset = 0;
10603 unsigned Size = KnownLen;
10604 for (auto CopyTy : MemOps) {
10605 // Issuing an unaligned load / store pair that overlaps with the previous
10606 // pair. Adjust the offset accordingly.
10607 if (CopyTy.getSizeInBytes() > Size)
10608 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10609
10610 // Construct MMOs for the accesses.
10611 auto *LoadMMO =
10612 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10613 auto *StoreMMO =
10614 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10615
10616 // Create the load.
10617 Register LoadPtr = Src;
10619 if (CurrOffset != 0) {
10620 LLT SrcTy = MRI.getType(Src);
10621 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10622 .getReg(0);
10623 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10624 }
10625 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10626
10627 // Create the store.
10628 Register StorePtr = Dst;
10629 if (CurrOffset != 0) {
10630 LLT DstTy = MRI.getType(Dst);
10631 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10632 }
10633 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10634 CurrOffset += CopyTy.getSizeInBytes();
10635 Size -= CopyTy.getSizeInBytes();
10636 }
10637
10638 MI.eraseFromParent();
10639 return Legalized;
10640}
10641
10643LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10644 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10645 bool IsVolatile) {
10646 auto &MF = *MI.getParent()->getParent();
10647 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10648 auto &DL = MF.getDataLayout();
10649 LLVMContext &C = MF.getFunction().getContext();
10650
10651 assert(KnownLen != 0 && "Have a zero length memmove length!");
10652
10653 bool DstAlignCanChange = false;
10654 MachineFrameInfo &MFI = MF.getFrameInfo();
10655 bool OptSize = shouldLowerMemFuncForSize(MF);
10656 Align Alignment = std::min(DstAlign, SrcAlign);
10657
10658 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10659 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10660 DstAlignCanChange = true;
10661
10662 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10663 std::vector<LLT> MemOps;
10664
10665 const auto &DstMMO = **MI.memoperands_begin();
10666 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10667 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10668 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10669
10670 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10671 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10672 // same thing here.
10674 MemOps, Limit,
10675 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10676 /*IsVolatile*/ true),
10677 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10678 MF.getFunction().getAttributes(), TLI))
10679 return UnableToLegalize;
10680
10681 if (DstAlignCanChange) {
10682 // Get an estimate of the type from the LLT.
10683 Type *IRTy = getTypeForLLT(MemOps[0], C);
10684 Align NewAlign = DL.getABITypeAlign(IRTy);
10685
10686 // Don't promote to an alignment that would require dynamic stack
10687 // realignment.
10688 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10689 if (!TRI->hasStackRealignment(MF))
10690 if (MaybeAlign StackAlign = DL.getStackAlignment())
10691 NewAlign = std::min(NewAlign, *StackAlign);
10692
10693 if (NewAlign > Alignment) {
10694 Alignment = NewAlign;
10695 unsigned FI = FIDef->getOperand(1).getIndex();
10696 // Give the stack frame object a larger alignment if needed.
10697 if (MFI.getObjectAlign(FI) < Alignment)
10698 MFI.setObjectAlignment(FI, Alignment);
10699 }
10700 }
10701
10702 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10703
10704 MachineIRBuilder MIB(MI);
10705 // Memmove requires that we perform the loads first before issuing the stores.
10706 // Apart from that, this loop is pretty much doing the same thing as the
10707 // memcpy codegen function.
10708 unsigned CurrOffset = 0;
10709 SmallVector<Register, 16> LoadVals;
10710 for (auto CopyTy : MemOps) {
10711 // Construct MMO for the load.
10712 auto *LoadMMO =
10713 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10714
10715 // Create the load.
10716 Register LoadPtr = Src;
10717 if (CurrOffset != 0) {
10718 LLT SrcTy = MRI.getType(Src);
10719 auto Offset =
10720 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10721 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10722 }
10723 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10724 CurrOffset += CopyTy.getSizeInBytes();
10725 }
10726
10727 CurrOffset = 0;
10728 for (unsigned I = 0; I < MemOps.size(); ++I) {
10729 LLT CopyTy = MemOps[I];
10730 // Now store the values loaded.
10731 auto *StoreMMO =
10732 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10733
10734 Register StorePtr = Dst;
10735 if (CurrOffset != 0) {
10736 LLT DstTy = MRI.getType(Dst);
10737 auto Offset =
10738 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10739 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10740 }
10741 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10742 CurrOffset += CopyTy.getSizeInBytes();
10743 }
10744 MI.eraseFromParent();
10745 return Legalized;
10746}
10747
10750 const unsigned Opc = MI.getOpcode();
10751 // This combine is fairly complex so it's not written with a separate
10752 // matcher function.
10753 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10754 Opc == TargetOpcode::G_MEMSET) &&
10755 "Expected memcpy like instruction");
10756
10757 auto MMOIt = MI.memoperands_begin();
10758 const MachineMemOperand *MemOp = *MMOIt;
10759
10760 Align DstAlign = MemOp->getBaseAlign();
10761 Align SrcAlign;
10762 auto [Dst, Src, Len] = MI.getFirst3Regs();
10763
10764 if (Opc != TargetOpcode::G_MEMSET) {
10765 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10766 MemOp = *(++MMOIt);
10767 SrcAlign = MemOp->getBaseAlign();
10768 }
10769
10770 // See if this is a constant length copy
10771 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10772 if (!LenVRegAndVal)
10773 return UnableToLegalize;
10774 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10775
10776 if (KnownLen == 0) {
10777 MI.eraseFromParent();
10778 return Legalized;
10779 }
10780
10781 if (MaxLen && KnownLen > MaxLen)
10782 return UnableToLegalize;
10783
10784 bool IsVolatile = MemOp->isVolatile();
10785 if (Opc == TargetOpcode::G_MEMCPY) {
10786 auto &MF = *MI.getParent()->getParent();
10787 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10788 bool OptSize = shouldLowerMemFuncForSize(MF);
10789 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10790 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10791 IsVolatile);
10792 }
10793 if (Opc == TargetOpcode::G_MEMMOVE)
10794 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10795 if (Opc == TargetOpcode::G_MEMSET)
10796 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10797 return UnableToLegalize;
10798}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1469
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:627
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2034
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1621
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1188
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1276
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)