LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
460 case TargetOpcode::G_FSINH:
461 RTLIBCASE(SINH_F);
462 case TargetOpcode::G_FCOSH:
463 RTLIBCASE(COSH_F);
464 case TargetOpcode::G_FTANH:
465 RTLIBCASE(TANH_F);
466 case TargetOpcode::G_FLOG10:
467 RTLIBCASE(LOG10_F);
468 case TargetOpcode::G_FLOG:
469 RTLIBCASE(LOG_F);
470 case TargetOpcode::G_FLOG2:
471 RTLIBCASE(LOG2_F);
472 case TargetOpcode::G_FLDEXP:
473 RTLIBCASE(LDEXP_F);
474 case TargetOpcode::G_FCEIL:
475 RTLIBCASE(CEIL_F);
476 case TargetOpcode::G_FFLOOR:
477 RTLIBCASE(FLOOR_F);
478 case TargetOpcode::G_FMINNUM:
479 RTLIBCASE(FMIN_F);
480 case TargetOpcode::G_FMAXNUM:
481 RTLIBCASE(FMAX_F);
482 case TargetOpcode::G_FSQRT:
483 RTLIBCASE(SQRT_F);
484 case TargetOpcode::G_FRINT:
485 RTLIBCASE(RINT_F);
486 case TargetOpcode::G_FNEARBYINT:
487 RTLIBCASE(NEARBYINT_F);
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
489 RTLIBCASE(ROUNDEVEN_F);
490 case TargetOpcode::G_INTRINSIC_LRINT:
491 RTLIBCASE(LRINT_F);
492 case TargetOpcode::G_INTRINSIC_LLRINT:
493 RTLIBCASE(LLRINT_F);
494 }
495 llvm_unreachable("Unknown libcall function");
496}
497
498/// True if an instruction is in tail position in its caller. Intended for
499/// legalizing libcalls as tail calls when possible.
502 const TargetInstrInfo &TII,
504 MachineBasicBlock &MBB = *MI.getParent();
505 const Function &F = MBB.getParent()->getFunction();
506
507 // Conservatively require the attributes of the call to match those of
508 // the return. Ignore NoAlias and NonNull because they don't affect the
509 // call sequence.
510 AttributeList CallerAttrs = F.getAttributes();
511 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
512 .removeAttribute(Attribute::NoAlias)
513 .removeAttribute(Attribute::NonNull)
514 .hasAttributes())
515 return false;
516
517 // It's not safe to eliminate the sign / zero extension of the return value.
518 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
519 CallerAttrs.hasRetAttr(Attribute::SExt))
520 return false;
521
522 // Only tail call if the following instruction is a standard return or if we
523 // have a `thisreturn` callee, and a sequence like:
524 //
525 // G_MEMCPY %0, %1, %2
526 // $x0 = COPY %0
527 // RET_ReallyLR implicit $x0
528 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
529 if (Next != MBB.instr_end() && Next->isCopy()) {
530 if (MI.getOpcode() == TargetOpcode::G_BZERO)
531 return false;
532
533 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
534 // mempy/etc routines return the same parameter. For other it will be the
535 // returned value.
536 Register VReg = MI.getOperand(0).getReg();
537 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
538 return false;
539
540 Register PReg = Next->getOperand(0).getReg();
541 if (!PReg.isPhysical())
542 return false;
543
544 auto Ret = next_nodbg(Next, MBB.instr_end());
545 if (Ret == MBB.instr_end() || !Ret->isReturn())
546 return false;
547
548 if (Ret->getNumImplicitOperands() != 1)
549 return false;
550
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
552 return false;
553
554 // Skip over the COPY that we just validated.
555 Next = Ret;
556 }
557
558 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
559 return false;
560
561 return true;
562}
563
566 const CallLowering::ArgInfo &Result,
568 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
569 MachineInstr *MI) {
570 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
571
573 Info.CallConv = CC;
575 Info.OrigRet = Result;
576 if (MI)
577 Info.IsTailCall =
578 (Result.Ty->isVoidTy() ||
579 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
580 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
581 *MIRBuilder.getMRI());
582
583 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder, Info))
586
587 if (MI && Info.LoweredTailCall) {
588 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
589
590 // Check debug locations before removing the return.
591 LocObserver.checkpoint(true);
592
593 // We must have a return following the call (or debug insts) to get past
594 // isLibCallInTailPosition.
595 do {
596 MachineInstr *Next = MI->getNextNode();
597 assert(Next &&
598 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
599 "Expected instr following MI to be return or debug inst?");
600 // We lowered a tail call, so the call is now the return from the block.
601 // Delete the old return.
602 Next->eraseFromParent();
603 } while (MI->getNextNode());
604
605 // We expect to lose the debug location from the return.
606 LocObserver.checkpoint(false);
607 }
609}
610
613 const CallLowering::ArgInfo &Result,
615 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
616 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
617 const char *Name = TLI.getLibcallName(Libcall);
618 if (!Name)
620 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
621 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
622}
623
624// Useful for libcalls where all operands have the same type.
627 Type *OpType, LostDebugLocObserver &LocObserver) {
628 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
629
630 // FIXME: What does the original arg index mean here?
632 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
633 Args.push_back({MO.getReg(), OpType, 0});
634 return createLibcall(MIRBuilder, Libcall,
635 {MI.getOperand(0).getReg(), OpType, 0}, Args,
636 LocObserver, &MI);
637}
638
641 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
642 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
643
645 // Add all the args, except for the last which is an imm denoting 'tail'.
646 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
647 Register Reg = MI.getOperand(i).getReg();
648
649 // Need derive an IR type for call lowering.
650 LLT OpLLT = MRI.getType(Reg);
651 Type *OpTy = nullptr;
652 if (OpLLT.isPointer())
653 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
654 else
655 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
656 Args.push_back({Reg, OpTy, 0});
657 }
658
659 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
660 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
661 RTLIB::Libcall RTLibcall;
662 unsigned Opc = MI.getOpcode();
663 switch (Opc) {
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
666 break;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
670 break;
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
674 break;
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
678 break;
679 default:
680 llvm_unreachable("unsupported opcode");
681 }
682 const char *Name = TLI.getLibcallName(RTLibcall);
683
684 // Unsupported libcall on the target.
685 if (!Name) {
686 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
687 << MIRBuilder.getTII().getName(Opc) << "\n");
689 }
690
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
694 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
695 Info.IsTailCall =
696 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
697 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
698
699 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder, Info))
702
703 if (Info.LoweredTailCall) {
704 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
705
706 // Check debug locations before removing the return.
707 LocObserver.checkpoint(true);
708
709 // We must have a return following the call (or debug insts) to get past
710 // isLibCallInTailPosition.
711 do {
712 MachineInstr *Next = MI.getNextNode();
713 assert(Next &&
714 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
715 "Expected instr following MI to be return or debug inst?");
716 // We lowered a tail call, so the call is now the return from the block.
717 // Delete the old return.
718 Next->eraseFromParent();
719 } while (MI.getNextNode());
720
721 // We expect to lose the debug location from the return.
722 LocObserver.checkpoint(false);
723 }
724
726}
727
729 unsigned Opc = MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
734 uint64_t MemSize = MemType.getSizeInBytes();
735 if (MemType.isVector())
736 return RTLIB::UNKNOWN_LIBCALL;
737
738#define LCALLS(A, B) \
739 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
740#define LCALL5(A) \
741 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
742 switch (Opc) {
743 case TargetOpcode::G_ATOMIC_CMPXCHG:
744 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
745 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
746 return getOutlineAtomicHelper(LC, Ordering, MemSize);
747 }
748 case TargetOpcode::G_ATOMICRMW_XCHG: {
749 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
750 return getOutlineAtomicHelper(LC, Ordering, MemSize);
751 }
752 case TargetOpcode::G_ATOMICRMW_ADD:
753 case TargetOpcode::G_ATOMICRMW_SUB: {
754 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
756 }
757 case TargetOpcode::G_ATOMICRMW_AND: {
758 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
759 return getOutlineAtomicHelper(LC, Ordering, MemSize);
760 }
761 case TargetOpcode::G_ATOMICRMW_OR: {
762 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
763 return getOutlineAtomicHelper(LC, Ordering, MemSize);
764 }
765 case TargetOpcode::G_ATOMICRMW_XOR: {
766 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
767 return getOutlineAtomicHelper(LC, Ordering, MemSize);
768 }
769 default:
770 return RTLIB::UNKNOWN_LIBCALL;
771 }
772#undef LCALLS
773#undef LCALL5
774}
775
778 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
779
780 Type *RetTy;
781 SmallVector<Register> RetRegs;
783 unsigned Opc = MI.getOpcode();
784 switch (Opc) {
785 case TargetOpcode::G_ATOMIC_CMPXCHG:
786 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
788 LLT SuccessLLT;
789 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
790 MI.getFirst4RegLLTs();
791 RetRegs.push_back(Ret);
792 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
793 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
794 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
795 NewLLT) = MI.getFirst5RegLLTs();
796 RetRegs.push_back(Success);
798 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
799 }
800 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
801 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
802 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
803 break;
804 }
805 case TargetOpcode::G_ATOMICRMW_XCHG:
806 case TargetOpcode::G_ATOMICRMW_ADD:
807 case TargetOpcode::G_ATOMICRMW_SUB:
808 case TargetOpcode::G_ATOMICRMW_AND:
809 case TargetOpcode::G_ATOMICRMW_OR:
810 case TargetOpcode::G_ATOMICRMW_XOR: {
811 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
812 RetRegs.push_back(Ret);
813 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
814 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
815 Val =
816 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
817 .getReg(0);
818 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
819 Val =
820 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
821 .getReg(0);
822 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
823 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
824 break;
825 }
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
831 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
833 const char *Name = TLI.getLibcallName(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (!Name) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
845 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
846
847 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
848 if (!CLI.lowerCall(MIRBuilder, Info))
850
852}
853
854static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
855 Type *FromType) {
856 auto ToMVT = MVT::getVT(ToType);
857 auto FromMVT = MVT::getVT(FromType);
858
859 switch (Opcode) {
860 case TargetOpcode::G_FPEXT:
861 return RTLIB::getFPEXT(FromMVT, ToMVT);
862 case TargetOpcode::G_FPTRUNC:
863 return RTLIB::getFPROUND(FromMVT, ToMVT);
864 case TargetOpcode::G_FPTOSI:
865 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
866 case TargetOpcode::G_FPTOUI:
867 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
868 case TargetOpcode::G_SITOFP:
869 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
870 case TargetOpcode::G_UITOFP:
871 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
872 }
873 llvm_unreachable("Unsupported libcall function");
874}
875
878 Type *FromType, LostDebugLocObserver &LocObserver) {
879 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
880 return createLibcall(
881 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
882 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
883}
884
885static RTLIB::Libcall
887 RTLIB::Libcall RTLibcall;
888 switch (MI.getOpcode()) {
889 case TargetOpcode::G_GET_FPENV:
890 RTLibcall = RTLIB::FEGETENV;
891 break;
892 case TargetOpcode::G_SET_FPENV:
893 case TargetOpcode::G_RESET_FPENV:
894 RTLibcall = RTLIB::FESETENV;
895 break;
896 case TargetOpcode::G_GET_FPMODE:
897 RTLibcall = RTLIB::FEGETMODE;
898 break;
899 case TargetOpcode::G_SET_FPMODE:
900 case TargetOpcode::G_RESET_FPMODE:
901 RTLibcall = RTLIB::FESETMODE;
902 break;
903 default:
904 llvm_unreachable("Unexpected opcode");
905 }
906 return RTLibcall;
907}
908
909// Some library functions that read FP state (fegetmode, fegetenv) write the
910// state into a region in memory. IR intrinsics that do the same operations
911// (get_fpmode, get_fpenv) return the state as integer value. To implement these
912// intrinsics via the library functions, we need to use temporary variable,
913// for example:
914//
915// %0:_(s32) = G_GET_FPMODE
916//
917// is transformed to:
918//
919// %1:_(p0) = G_FRAME_INDEX %stack.0
920// BL &fegetmode
921// %0:_(s32) = G_LOAD % 1
922//
924LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
926 LostDebugLocObserver &LocObserver) {
928 auto &MF = MIRBuilder.getMF();
929 auto &MRI = *MIRBuilder.getMRI();
930 auto &Ctx = MF.getFunction().getContext();
931
932 // Create temporary, where library function will put the read state.
933 Register Dst = MI.getOperand(0).getReg();
934 LLT StateTy = MRI.getType(Dst);
935 TypeSize StateSize = StateTy.getSizeInBytes();
937 MachinePointerInfo TempPtrInfo;
938 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
939
940 // Create a call to library function, with the temporary as an argument.
941 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
942 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
944 auto Res =
945 createLibcall(MIRBuilder, RTLibcall,
947 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
948 LocObserver, nullptr);
950 return Res;
951
952 // Create a load from the temporary.
953 MachineMemOperand *MMO = MF.getMachineMemOperand(
954 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
955 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
956
958}
959
960// Similar to `createGetStateLibcall` the function calls a library function
961// using transient space in stack. In this case the library function reads
962// content of memory region.
964LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
966 LostDebugLocObserver &LocObserver) {
968 auto &MF = MIRBuilder.getMF();
969 auto &MRI = *MIRBuilder.getMRI();
970 auto &Ctx = MF.getFunction().getContext();
971
972 // Create temporary, where library function will get the new state.
973 Register Src = MI.getOperand(0).getReg();
974 LLT StateTy = MRI.getType(Src);
975 TypeSize StateSize = StateTy.getSizeInBytes();
977 MachinePointerInfo TempPtrInfo;
978 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
979
980 // Put the new state into the temporary.
981 MachineMemOperand *MMO = MF.getMachineMemOperand(
982 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
983 MIRBuilder.buildStore(Src, Temp, *MMO);
984
985 // Create a call to library function, with the temporary as an argument.
986 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
987 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
989 return createLibcall(MIRBuilder, RTLibcall,
991 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
992 LocObserver, nullptr);
993}
994
995// The function is used to legalize operations that set default environment
996// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
997// On most targets supported in glibc FE_DFL_MODE is defined as
998// `((const femode_t *) -1)`. Such assumption is used here. If for some target
999// it is not true, the target must provide custom lowering.
1001LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1003 LostDebugLocObserver &LocObserver) {
1005 auto &MF = MIRBuilder.getMF();
1006 auto &Ctx = MF.getFunction().getContext();
1007
1008 // Create an argument for the library function.
1009 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1010 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1011 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1012 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1013 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1014 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1015 MIRBuilder.buildIntToPtr(Dest, DefValue);
1016
1018 return createLibcall(MIRBuilder, RTLibcall,
1020 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1021 LocObserver, &MI);
1022}
1023
1026 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1027
1028 switch (MI.getOpcode()) {
1029 default:
1030 return UnableToLegalize;
1031 case TargetOpcode::G_MUL:
1032 case TargetOpcode::G_SDIV:
1033 case TargetOpcode::G_UDIV:
1034 case TargetOpcode::G_SREM:
1035 case TargetOpcode::G_UREM:
1036 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1037 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1038 unsigned Size = LLTy.getSizeInBits();
1039 Type *HLTy = IntegerType::get(Ctx, Size);
1040 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1041 if (Status != Legalized)
1042 return Status;
1043 break;
1044 }
1045 case TargetOpcode::G_FADD:
1046 case TargetOpcode::G_FSUB:
1047 case TargetOpcode::G_FMUL:
1048 case TargetOpcode::G_FDIV:
1049 case TargetOpcode::G_FMA:
1050 case TargetOpcode::G_FPOW:
1051 case TargetOpcode::G_FREM:
1052 case TargetOpcode::G_FCOS:
1053 case TargetOpcode::G_FSIN:
1054 case TargetOpcode::G_FTAN:
1055 case TargetOpcode::G_FACOS:
1056 case TargetOpcode::G_FASIN:
1057 case TargetOpcode::G_FATAN:
1058 case TargetOpcode::G_FCOSH:
1059 case TargetOpcode::G_FSINH:
1060 case TargetOpcode::G_FTANH:
1061 case TargetOpcode::G_FLOG10:
1062 case TargetOpcode::G_FLOG:
1063 case TargetOpcode::G_FLOG2:
1064 case TargetOpcode::G_FLDEXP:
1065 case TargetOpcode::G_FEXP:
1066 case TargetOpcode::G_FEXP2:
1067 case TargetOpcode::G_FEXP10:
1068 case TargetOpcode::G_FCEIL:
1069 case TargetOpcode::G_FFLOOR:
1070 case TargetOpcode::G_FMINNUM:
1071 case TargetOpcode::G_FMAXNUM:
1072 case TargetOpcode::G_FSQRT:
1073 case TargetOpcode::G_FRINT:
1074 case TargetOpcode::G_FNEARBYINT:
1075 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1076 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1077 unsigned Size = LLTy.getSizeInBits();
1078 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1079 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1080 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1081 return UnableToLegalize;
1082 }
1083 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1084 if (Status != Legalized)
1085 return Status;
1086 break;
1087 }
1088 case TargetOpcode::G_INTRINSIC_LRINT:
1089 case TargetOpcode::G_INTRINSIC_LLRINT: {
1090 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1091 unsigned Size = LLTy.getSizeInBits();
1092 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1093 Type *ITy = IntegerType::get(
1094 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1095 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1096 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1097 return UnableToLegalize;
1098 }
1099 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1101 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1102 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1103 if (Status != Legalized)
1104 return Status;
1105 MI.eraseFromParent();
1106 return Legalized;
1107 }
1108 case TargetOpcode::G_FPOWI: {
1109 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1110 unsigned Size = LLTy.getSizeInBits();
1111 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1112 Type *ITy = IntegerType::get(
1113 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1114 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1115 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1116 return UnableToLegalize;
1117 }
1118 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1119 std::initializer_list<CallLowering::ArgInfo> Args = {
1120 {MI.getOperand(1).getReg(), HLTy, 0},
1121 {MI.getOperand(2).getReg(), ITy, 1}};
1123 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1124 Args, LocObserver, &MI);
1125 if (Status != Legalized)
1126 return Status;
1127 break;
1128 }
1129 case TargetOpcode::G_FPEXT:
1130 case TargetOpcode::G_FPTRUNC: {
1131 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1132 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1133 if (!FromTy || !ToTy)
1134 return UnableToLegalize;
1136 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1137 if (Status != Legalized)
1138 return Status;
1139 break;
1140 }
1141 case TargetOpcode::G_FPTOSI:
1142 case TargetOpcode::G_FPTOUI: {
1143 // FIXME: Support other types
1144 Type *FromTy =
1145 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1146 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1147 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1148 return UnableToLegalize;
1150 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
1151 if (Status != Legalized)
1152 return Status;
1153 break;
1154 }
1155 case TargetOpcode::G_SITOFP:
1156 case TargetOpcode::G_UITOFP: {
1157 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1158 Type *ToTy =
1159 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1160 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1161 return UnableToLegalize;
1163 MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver);
1164 if (Status != Legalized)
1165 return Status;
1166 break;
1167 }
1168 case TargetOpcode::G_ATOMICRMW_XCHG:
1169 case TargetOpcode::G_ATOMICRMW_ADD:
1170 case TargetOpcode::G_ATOMICRMW_SUB:
1171 case TargetOpcode::G_ATOMICRMW_AND:
1172 case TargetOpcode::G_ATOMICRMW_OR:
1173 case TargetOpcode::G_ATOMICRMW_XOR:
1174 case TargetOpcode::G_ATOMIC_CMPXCHG:
1175 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1177 if (Status != Legalized)
1178 return Status;
1179 break;
1180 }
1181 case TargetOpcode::G_BZERO:
1182 case TargetOpcode::G_MEMCPY:
1183 case TargetOpcode::G_MEMMOVE:
1184 case TargetOpcode::G_MEMSET: {
1185 LegalizeResult Result =
1186 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1187 if (Result != Legalized)
1188 return Result;
1189 MI.eraseFromParent();
1190 return Result;
1191 }
1192 case TargetOpcode::G_GET_FPENV:
1193 case TargetOpcode::G_GET_FPMODE: {
1194 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1195 if (Result != Legalized)
1196 return Result;
1197 break;
1198 }
1199 case TargetOpcode::G_SET_FPENV:
1200 case TargetOpcode::G_SET_FPMODE: {
1201 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1202 if (Result != Legalized)
1203 return Result;
1204 break;
1205 }
1206 case TargetOpcode::G_RESET_FPENV:
1207 case TargetOpcode::G_RESET_FPMODE: {
1208 LegalizeResult Result =
1209 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1210 if (Result != Legalized)
1211 return Result;
1212 break;
1213 }
1214 }
1215
1216 MI.eraseFromParent();
1217 return Legalized;
1218}
1219
1221 unsigned TypeIdx,
1222 LLT NarrowTy) {
1223 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1224 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1225
1226 switch (MI.getOpcode()) {
1227 default:
1228 return UnableToLegalize;
1229 case TargetOpcode::G_IMPLICIT_DEF: {
1230 Register DstReg = MI.getOperand(0).getReg();
1231 LLT DstTy = MRI.getType(DstReg);
1232
1233 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1234 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1235 // FIXME: Although this would also be legal for the general case, it causes
1236 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1237 // combines not being hit). This seems to be a problem related to the
1238 // artifact combiner.
1239 if (SizeOp0 % NarrowSize != 0) {
1240 LLT ImplicitTy = NarrowTy;
1241 if (DstTy.isVector())
1242 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1243
1244 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1245 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1246
1247 MI.eraseFromParent();
1248 return Legalized;
1249 }
1250
1251 int NumParts = SizeOp0 / NarrowSize;
1252
1254 for (int i = 0; i < NumParts; ++i)
1255 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1256
1257 if (DstTy.isVector())
1258 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1259 else
1260 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1261 MI.eraseFromParent();
1262 return Legalized;
1263 }
1264 case TargetOpcode::G_CONSTANT: {
1265 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1266 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1267 unsigned TotalSize = Ty.getSizeInBits();
1268 unsigned NarrowSize = NarrowTy.getSizeInBits();
1269 int NumParts = TotalSize / NarrowSize;
1270
1271 SmallVector<Register, 4> PartRegs;
1272 for (int I = 0; I != NumParts; ++I) {
1273 unsigned Offset = I * NarrowSize;
1274 auto K = MIRBuilder.buildConstant(NarrowTy,
1275 Val.lshr(Offset).trunc(NarrowSize));
1276 PartRegs.push_back(K.getReg(0));
1277 }
1278
1279 LLT LeftoverTy;
1280 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1281 SmallVector<Register, 1> LeftoverRegs;
1282 if (LeftoverBits != 0) {
1283 LeftoverTy = LLT::scalar(LeftoverBits);
1284 auto K = MIRBuilder.buildConstant(
1285 LeftoverTy,
1286 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1287 LeftoverRegs.push_back(K.getReg(0));
1288 }
1289
1290 insertParts(MI.getOperand(0).getReg(),
1291 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1292
1293 MI.eraseFromParent();
1294 return Legalized;
1295 }
1296 case TargetOpcode::G_SEXT:
1297 case TargetOpcode::G_ZEXT:
1298 case TargetOpcode::G_ANYEXT:
1299 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1300 case TargetOpcode::G_TRUNC: {
1301 if (TypeIdx != 1)
1302 return UnableToLegalize;
1303
1304 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1305 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1306 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1307 return UnableToLegalize;
1308 }
1309
1310 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1311 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1312 MI.eraseFromParent();
1313 return Legalized;
1314 }
1315 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1316 case TargetOpcode::G_FREEZE: {
1317 if (TypeIdx != 0)
1318 return UnableToLegalize;
1319
1320 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1321 // Should widen scalar first
1322 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1323 return UnableToLegalize;
1324
1325 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1327 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1328 Parts.push_back(
1329 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1330 .getReg(0));
1331 }
1332
1333 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1334 MI.eraseFromParent();
1335 return Legalized;
1336 }
1337 case TargetOpcode::G_ADD:
1338 case TargetOpcode::G_SUB:
1339 case TargetOpcode::G_SADDO:
1340 case TargetOpcode::G_SSUBO:
1341 case TargetOpcode::G_SADDE:
1342 case TargetOpcode::G_SSUBE:
1343 case TargetOpcode::G_UADDO:
1344 case TargetOpcode::G_USUBO:
1345 case TargetOpcode::G_UADDE:
1346 case TargetOpcode::G_USUBE:
1347 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1348 case TargetOpcode::G_MUL:
1349 case TargetOpcode::G_UMULH:
1350 return narrowScalarMul(MI, NarrowTy);
1351 case TargetOpcode::G_EXTRACT:
1352 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1353 case TargetOpcode::G_INSERT:
1354 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1355 case TargetOpcode::G_LOAD: {
1356 auto &LoadMI = cast<GLoad>(MI);
1357 Register DstReg = LoadMI.getDstReg();
1358 LLT DstTy = MRI.getType(DstReg);
1359 if (DstTy.isVector())
1360 return UnableToLegalize;
1361
1362 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1363 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1364 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1365 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1366 LoadMI.eraseFromParent();
1367 return Legalized;
1368 }
1369
1370 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1371 }
1372 case TargetOpcode::G_ZEXTLOAD:
1373 case TargetOpcode::G_SEXTLOAD: {
1374 auto &LoadMI = cast<GExtLoad>(MI);
1375 Register DstReg = LoadMI.getDstReg();
1376 Register PtrReg = LoadMI.getPointerReg();
1377
1378 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1379 auto &MMO = LoadMI.getMMO();
1380 unsigned MemSize = MMO.getSizeInBits().getValue();
1381
1382 if (MemSize == NarrowSize) {
1383 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1384 } else if (MemSize < NarrowSize) {
1385 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1386 } else if (MemSize > NarrowSize) {
1387 // FIXME: Need to split the load.
1388 return UnableToLegalize;
1389 }
1390
1391 if (isa<GZExtLoad>(LoadMI))
1392 MIRBuilder.buildZExt(DstReg, TmpReg);
1393 else
1394 MIRBuilder.buildSExt(DstReg, TmpReg);
1395
1396 LoadMI.eraseFromParent();
1397 return Legalized;
1398 }
1399 case TargetOpcode::G_STORE: {
1400 auto &StoreMI = cast<GStore>(MI);
1401
1402 Register SrcReg = StoreMI.getValueReg();
1403 LLT SrcTy = MRI.getType(SrcReg);
1404 if (SrcTy.isVector())
1405 return UnableToLegalize;
1406
1407 int NumParts = SizeOp0 / NarrowSize;
1408 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1409 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1410 if (SrcTy.isVector() && LeftoverBits != 0)
1411 return UnableToLegalize;
1412
1413 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1414 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1415 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1416 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1417 StoreMI.eraseFromParent();
1418 return Legalized;
1419 }
1420
1421 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1422 }
1423 case TargetOpcode::G_SELECT:
1424 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1425 case TargetOpcode::G_AND:
1426 case TargetOpcode::G_OR:
1427 case TargetOpcode::G_XOR: {
1428 // Legalize bitwise operation:
1429 // A = BinOp<Ty> B, C
1430 // into:
1431 // B1, ..., BN = G_UNMERGE_VALUES B
1432 // C1, ..., CN = G_UNMERGE_VALUES C
1433 // A1 = BinOp<Ty/N> B1, C2
1434 // ...
1435 // AN = BinOp<Ty/N> BN, CN
1436 // A = G_MERGE_VALUES A1, ..., AN
1437 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1438 }
1439 case TargetOpcode::G_SHL:
1440 case TargetOpcode::G_LSHR:
1441 case TargetOpcode::G_ASHR:
1442 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1443 case TargetOpcode::G_CTLZ:
1444 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1445 case TargetOpcode::G_CTTZ:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1447 case TargetOpcode::G_CTPOP:
1448 if (TypeIdx == 1)
1449 switch (MI.getOpcode()) {
1450 case TargetOpcode::G_CTLZ:
1451 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1452 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1453 case TargetOpcode::G_CTTZ:
1454 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1455 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1456 case TargetOpcode::G_CTPOP:
1457 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1458 default:
1459 return UnableToLegalize;
1460 }
1461
1463 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1465 return Legalized;
1466 case TargetOpcode::G_INTTOPTR:
1467 if (TypeIdx != 1)
1468 return UnableToLegalize;
1469
1471 narrowScalarSrc(MI, NarrowTy, 1);
1473 return Legalized;
1474 case TargetOpcode::G_PTRTOINT:
1475 if (TypeIdx != 0)
1476 return UnableToLegalize;
1477
1479 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1481 return Legalized;
1482 case TargetOpcode::G_PHI: {
1483 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1484 // NarrowSize.
1485 if (SizeOp0 % NarrowSize != 0)
1486 return UnableToLegalize;
1487
1488 unsigned NumParts = SizeOp0 / NarrowSize;
1489 SmallVector<Register, 2> DstRegs(NumParts);
1490 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1492 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1493 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1495 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1496 SrcRegs[i / 2], MIRBuilder, MRI);
1497 }
1498 MachineBasicBlock &MBB = *MI.getParent();
1500 for (unsigned i = 0; i < NumParts; ++i) {
1501 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1503 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1504 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1505 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1506 }
1508 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1510 MI.eraseFromParent();
1511 return Legalized;
1512 }
1513 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1514 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1515 if (TypeIdx != 2)
1516 return UnableToLegalize;
1517
1518 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1520 narrowScalarSrc(MI, NarrowTy, OpIdx);
1522 return Legalized;
1523 }
1524 case TargetOpcode::G_ICMP: {
1525 Register LHS = MI.getOperand(2).getReg();
1526 LLT SrcTy = MRI.getType(LHS);
1527 uint64_t SrcSize = SrcTy.getSizeInBits();
1528 CmpInst::Predicate Pred =
1529 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1530
1531 // TODO: Handle the non-equality case for weird sizes.
1532 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1533 return UnableToLegalize;
1534
1535 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1536 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1537 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1538 LHSLeftoverRegs, MIRBuilder, MRI))
1539 return UnableToLegalize;
1540
1541 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1542 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1543 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1544 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1545 return UnableToLegalize;
1546
1547 // We now have the LHS and RHS of the compare split into narrow-type
1548 // registers, plus potentially some leftover type.
1549 Register Dst = MI.getOperand(0).getReg();
1550 LLT ResTy = MRI.getType(Dst);
1551 if (ICmpInst::isEquality(Pred)) {
1552 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1553 // them together. For each equal part, the result should be all 0s. For
1554 // each non-equal part, we'll get at least one 1.
1555 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1557 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1558 auto LHS = std::get<0>(LHSAndRHS);
1559 auto RHS = std::get<1>(LHSAndRHS);
1560 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1561 Xors.push_back(Xor);
1562 }
1563
1564 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1565 // to the desired narrow type so that we can OR them together later.
1566 SmallVector<Register, 4> WidenedXors;
1567 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1568 auto LHS = std::get<0>(LHSAndRHS);
1569 auto RHS = std::get<1>(LHSAndRHS);
1570 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1571 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1572 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1573 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1574 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1575 }
1576
1577 // Now, for each part we broke up, we know if they are equal/not equal
1578 // based off the G_XOR. We can OR these all together and compare against
1579 // 0 to get the result.
1580 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1581 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1582 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1583 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1584 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1585 } else {
1586 // TODO: Handle non-power-of-two types.
1587 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1588 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1589 Register LHSL = LHSPartRegs[0];
1590 Register LHSH = LHSPartRegs[1];
1591 Register RHSL = RHSPartRegs[0];
1592 Register RHSH = RHSPartRegs[1];
1593 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1594 MachineInstrBuilder CmpHEQ =
1597 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1598 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1599 }
1600 MI.eraseFromParent();
1601 return Legalized;
1602 }
1603 case TargetOpcode::G_FCMP:
1604 if (TypeIdx != 0)
1605 return UnableToLegalize;
1606
1608 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1610 return Legalized;
1611
1612 case TargetOpcode::G_SEXT_INREG: {
1613 if (TypeIdx != 0)
1614 return UnableToLegalize;
1615
1616 int64_t SizeInBits = MI.getOperand(2).getImm();
1617
1618 // So long as the new type has more bits than the bits we're extending we
1619 // don't need to break it apart.
1620 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1622 // We don't lose any non-extension bits by truncating the src and
1623 // sign-extending the dst.
1624 MachineOperand &MO1 = MI.getOperand(1);
1625 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1626 MO1.setReg(TruncMIB.getReg(0));
1627
1628 MachineOperand &MO2 = MI.getOperand(0);
1629 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1631 MIRBuilder.buildSExt(MO2, DstExt);
1632 MO2.setReg(DstExt);
1634 return Legalized;
1635 }
1636
1637 // Break it apart. Components below the extension point are unmodified. The
1638 // component containing the extension point becomes a narrower SEXT_INREG.
1639 // Components above it are ashr'd from the component containing the
1640 // extension point.
1641 if (SizeOp0 % NarrowSize != 0)
1642 return UnableToLegalize;
1643 int NumParts = SizeOp0 / NarrowSize;
1644
1645 // List the registers where the destination will be scattered.
1647 // List the registers where the source will be split.
1649
1650 // Create all the temporary registers.
1651 for (int i = 0; i < NumParts; ++i) {
1652 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1653
1654 SrcRegs.push_back(SrcReg);
1655 }
1656
1657 // Explode the big arguments into smaller chunks.
1658 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1659
1660 Register AshrCstReg =
1661 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1662 .getReg(0);
1663 Register FullExtensionReg;
1664 Register PartialExtensionReg;
1665
1666 // Do the operation on each small part.
1667 for (int i = 0; i < NumParts; ++i) {
1668 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1669 DstRegs.push_back(SrcRegs[i]);
1670 PartialExtensionReg = DstRegs.back();
1671 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1672 assert(PartialExtensionReg &&
1673 "Expected to visit partial extension before full");
1674 if (FullExtensionReg) {
1675 DstRegs.push_back(FullExtensionReg);
1676 continue;
1677 }
1678 DstRegs.push_back(
1679 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1680 .getReg(0));
1681 FullExtensionReg = DstRegs.back();
1682 } else {
1683 DstRegs.push_back(
1685 .buildInstr(
1686 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1687 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1688 .getReg(0));
1689 PartialExtensionReg = DstRegs.back();
1690 }
1691 }
1692
1693 // Gather the destination registers into the final destination.
1694 Register DstReg = MI.getOperand(0).getReg();
1695 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1696 MI.eraseFromParent();
1697 return Legalized;
1698 }
1699 case TargetOpcode::G_BSWAP:
1700 case TargetOpcode::G_BITREVERSE: {
1701 if (SizeOp0 % NarrowSize != 0)
1702 return UnableToLegalize;
1703
1705 SmallVector<Register, 2> SrcRegs, DstRegs;
1706 unsigned NumParts = SizeOp0 / NarrowSize;
1707 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1708 MIRBuilder, MRI);
1709
1710 for (unsigned i = 0; i < NumParts; ++i) {
1711 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1712 {SrcRegs[NumParts - 1 - i]});
1713 DstRegs.push_back(DstPart.getReg(0));
1714 }
1715
1716 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1717
1719 MI.eraseFromParent();
1720 return Legalized;
1721 }
1722 case TargetOpcode::G_PTR_ADD:
1723 case TargetOpcode::G_PTRMASK: {
1724 if (TypeIdx != 1)
1725 return UnableToLegalize;
1727 narrowScalarSrc(MI, NarrowTy, 2);
1729 return Legalized;
1730 }
1731 case TargetOpcode::G_FPTOUI:
1732 case TargetOpcode::G_FPTOSI:
1733 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1734 case TargetOpcode::G_FPEXT:
1735 if (TypeIdx != 0)
1736 return UnableToLegalize;
1738 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1740 return Legalized;
1741 case TargetOpcode::G_FLDEXP:
1742 case TargetOpcode::G_STRICT_FLDEXP:
1743 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1744 case TargetOpcode::G_VSCALE: {
1745 Register Dst = MI.getOperand(0).getReg();
1746 LLT Ty = MRI.getType(Dst);
1747
1748 // Assume VSCALE(1) fits into a legal integer
1749 const APInt One(NarrowTy.getSizeInBits(), 1);
1750 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1751 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1752 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1753 MIRBuilder.buildMul(Dst, ZExt, C);
1754
1755 MI.eraseFromParent();
1756 return Legalized;
1757 }
1758 }
1759}
1760
1762 LLT Ty = MRI.getType(Val);
1763 if (Ty.isScalar())
1764 return Val;
1765
1767 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1768 if (Ty.isPointer()) {
1769 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1770 return Register();
1771 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1772 }
1773
1774 Register NewVal = Val;
1775
1776 assert(Ty.isVector());
1777 if (Ty.isPointerVector())
1778 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1779 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1780}
1781
1783 unsigned OpIdx, unsigned ExtOpcode) {
1784 MachineOperand &MO = MI.getOperand(OpIdx);
1785 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1786 MO.setReg(ExtB.getReg(0));
1787}
1788
1790 unsigned OpIdx) {
1791 MachineOperand &MO = MI.getOperand(OpIdx);
1792 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1793 MO.setReg(ExtB.getReg(0));
1794}
1795
1797 unsigned OpIdx, unsigned TruncOpcode) {
1798 MachineOperand &MO = MI.getOperand(OpIdx);
1799 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1801 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1802 MO.setReg(DstExt);
1803}
1804
1806 unsigned OpIdx, unsigned ExtOpcode) {
1807 MachineOperand &MO = MI.getOperand(OpIdx);
1808 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1810 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1811 MO.setReg(DstTrunc);
1812}
1813
1815 unsigned OpIdx) {
1816 MachineOperand &MO = MI.getOperand(OpIdx);
1818 Register Dst = MO.getReg();
1819 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1820 MO.setReg(DstExt);
1822}
1823
1825 unsigned OpIdx) {
1826 MachineOperand &MO = MI.getOperand(OpIdx);
1829}
1830
1831void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1832 MachineOperand &Op = MI.getOperand(OpIdx);
1833 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1834}
1835
1836void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1837 MachineOperand &MO = MI.getOperand(OpIdx);
1838 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1840 MIRBuilder.buildBitcast(MO, CastDst);
1841 MO.setReg(CastDst);
1842}
1843
1845LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1846 LLT WideTy) {
1847 if (TypeIdx != 1)
1848 return UnableToLegalize;
1849
1850 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1851 if (DstTy.isVector())
1852 return UnableToLegalize;
1853
1854 LLT SrcTy = MRI.getType(Src1Reg);
1855 const int DstSize = DstTy.getSizeInBits();
1856 const int SrcSize = SrcTy.getSizeInBits();
1857 const int WideSize = WideTy.getSizeInBits();
1858 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1859
1860 unsigned NumOps = MI.getNumOperands();
1861 unsigned NumSrc = MI.getNumOperands() - 1;
1862 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1863
1864 if (WideSize >= DstSize) {
1865 // Directly pack the bits in the target type.
1866 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1867
1868 for (unsigned I = 2; I != NumOps; ++I) {
1869 const unsigned Offset = (I - 1) * PartSize;
1870
1871 Register SrcReg = MI.getOperand(I).getReg();
1872 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1873
1874 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1875
1876 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1877 MRI.createGenericVirtualRegister(WideTy);
1878
1879 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1880 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1881 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1882 ResultReg = NextResult;
1883 }
1884
1885 if (WideSize > DstSize)
1886 MIRBuilder.buildTrunc(DstReg, ResultReg);
1887 else if (DstTy.isPointer())
1888 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1889
1890 MI.eraseFromParent();
1891 return Legalized;
1892 }
1893
1894 // Unmerge the original values to the GCD type, and recombine to the next
1895 // multiple greater than the original type.
1896 //
1897 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1898 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1899 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1900 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1901 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1902 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1903 // %12:_(s12) = G_MERGE_VALUES %10, %11
1904 //
1905 // Padding with undef if necessary:
1906 //
1907 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1908 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1909 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1910 // %7:_(s2) = G_IMPLICIT_DEF
1911 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1912 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1913 // %10:_(s12) = G_MERGE_VALUES %8, %9
1914
1915 const int GCD = std::gcd(SrcSize, WideSize);
1916 LLT GCDTy = LLT::scalar(GCD);
1917
1919 SmallVector<Register, 8> NewMergeRegs;
1920 SmallVector<Register, 8> Unmerges;
1921 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1922
1923 // Decompose the original operands if they don't evenly divide.
1924 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1925 Register SrcReg = MO.getReg();
1926 if (GCD == SrcSize) {
1927 Unmerges.push_back(SrcReg);
1928 } else {
1929 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1930 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1931 Unmerges.push_back(Unmerge.getReg(J));
1932 }
1933 }
1934
1935 // Pad with undef to the next size that is a multiple of the requested size.
1936 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1937 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1938 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1939 Unmerges.push_back(UndefReg);
1940 }
1941
1942 const int PartsPerGCD = WideSize / GCD;
1943
1944 // Build merges of each piece.
1945 ArrayRef<Register> Slicer(Unmerges);
1946 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1947 auto Merge =
1948 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1949 NewMergeRegs.push_back(Merge.getReg(0));
1950 }
1951
1952 // A truncate may be necessary if the requested type doesn't evenly divide the
1953 // original result type.
1954 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1955 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1956 } else {
1957 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1958 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1959 }
1960
1961 MI.eraseFromParent();
1962 return Legalized;
1963}
1964
1966LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1967 LLT WideTy) {
1968 if (TypeIdx != 0)
1969 return UnableToLegalize;
1970
1971 int NumDst = MI.getNumOperands() - 1;
1972 Register SrcReg = MI.getOperand(NumDst).getReg();
1973 LLT SrcTy = MRI.getType(SrcReg);
1974 if (SrcTy.isVector())
1975 return UnableToLegalize;
1976
1977 Register Dst0Reg = MI.getOperand(0).getReg();
1978 LLT DstTy = MRI.getType(Dst0Reg);
1979 if (!DstTy.isScalar())
1980 return UnableToLegalize;
1981
1982 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1983 if (SrcTy.isPointer()) {
1985 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1986 LLVM_DEBUG(
1987 dbgs() << "Not casting non-integral address space integer\n");
1988 return UnableToLegalize;
1989 }
1990
1991 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1992 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1993 }
1994
1995 // Widen SrcTy to WideTy. This does not affect the result, but since the
1996 // user requested this size, it is probably better handled than SrcTy and
1997 // should reduce the total number of legalization artifacts.
1998 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1999 SrcTy = WideTy;
2000 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2001 }
2002
2003 // Theres no unmerge type to target. Directly extract the bits from the
2004 // source type
2005 unsigned DstSize = DstTy.getSizeInBits();
2006
2007 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2008 for (int I = 1; I != NumDst; ++I) {
2009 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2010 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2011 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2012 }
2013
2014 MI.eraseFromParent();
2015 return Legalized;
2016 }
2017
2018 // Extend the source to a wider type.
2019 LLT LCMTy = getLCMType(SrcTy, WideTy);
2020
2021 Register WideSrc = SrcReg;
2022 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2023 // TODO: If this is an integral address space, cast to integer and anyext.
2024 if (SrcTy.isPointer()) {
2025 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2026 return UnableToLegalize;
2027 }
2028
2029 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2030 }
2031
2032 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2033
2034 // Create a sequence of unmerges and merges to the original results. Since we
2035 // may have widened the source, we will need to pad the results with dead defs
2036 // to cover the source register.
2037 // e.g. widen s48 to s64:
2038 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2039 //
2040 // =>
2041 // %4:_(s192) = G_ANYEXT %0:_(s96)
2042 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2043 // ; unpack to GCD type, with extra dead defs
2044 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2045 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2046 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2047 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2048 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2049 const LLT GCDTy = getGCDType(WideTy, DstTy);
2050 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2051 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2052
2053 // Directly unmerge to the destination without going through a GCD type
2054 // if possible
2055 if (PartsPerRemerge == 1) {
2056 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2057
2058 for (int I = 0; I != NumUnmerge; ++I) {
2059 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2060
2061 for (int J = 0; J != PartsPerUnmerge; ++J) {
2062 int Idx = I * PartsPerUnmerge + J;
2063 if (Idx < NumDst)
2064 MIB.addDef(MI.getOperand(Idx).getReg());
2065 else {
2066 // Create dead def for excess components.
2067 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2068 }
2069 }
2070
2071 MIB.addUse(Unmerge.getReg(I));
2072 }
2073 } else {
2075 for (int J = 0; J != NumUnmerge; ++J)
2076 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2077
2078 SmallVector<Register, 8> RemergeParts;
2079 for (int I = 0; I != NumDst; ++I) {
2080 for (int J = 0; J < PartsPerRemerge; ++J) {
2081 const int Idx = I * PartsPerRemerge + J;
2082 RemergeParts.emplace_back(Parts[Idx]);
2083 }
2084
2085 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2086 RemergeParts.clear();
2087 }
2088 }
2089
2090 MI.eraseFromParent();
2091 return Legalized;
2092}
2093
2095LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2096 LLT WideTy) {
2097 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2098 unsigned Offset = MI.getOperand(2).getImm();
2099
2100 if (TypeIdx == 0) {
2101 if (SrcTy.isVector() || DstTy.isVector())
2102 return UnableToLegalize;
2103
2104 SrcOp Src(SrcReg);
2105 if (SrcTy.isPointer()) {
2106 // Extracts from pointers can be handled only if they are really just
2107 // simple integers.
2109 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2110 return UnableToLegalize;
2111
2112 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2113 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2114 SrcTy = SrcAsIntTy;
2115 }
2116
2117 if (DstTy.isPointer())
2118 return UnableToLegalize;
2119
2120 if (Offset == 0) {
2121 // Avoid a shift in the degenerate case.
2122 MIRBuilder.buildTrunc(DstReg,
2123 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2124 MI.eraseFromParent();
2125 return Legalized;
2126 }
2127
2128 // Do a shift in the source type.
2129 LLT ShiftTy = SrcTy;
2130 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2131 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2132 ShiftTy = WideTy;
2133 }
2134
2135 auto LShr = MIRBuilder.buildLShr(
2136 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2137 MIRBuilder.buildTrunc(DstReg, LShr);
2138 MI.eraseFromParent();
2139 return Legalized;
2140 }
2141
2142 if (SrcTy.isScalar()) {
2144 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2146 return Legalized;
2147 }
2148
2149 if (!SrcTy.isVector())
2150 return UnableToLegalize;
2151
2152 if (DstTy != SrcTy.getElementType())
2153 return UnableToLegalize;
2154
2155 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2156 return UnableToLegalize;
2157
2159 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2160
2161 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2162 Offset);
2163 widenScalarDst(MI, WideTy.getScalarType(), 0);
2165 return Legalized;
2166}
2167
2169LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2170 LLT WideTy) {
2171 if (TypeIdx != 0 || WideTy.isVector())
2172 return UnableToLegalize;
2174 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2175 widenScalarDst(MI, WideTy);
2177 return Legalized;
2178}
2179
2181LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2182 LLT WideTy) {
2183 unsigned Opcode;
2184 unsigned ExtOpcode;
2185 std::optional<Register> CarryIn;
2186 switch (MI.getOpcode()) {
2187 default:
2188 llvm_unreachable("Unexpected opcode!");
2189 case TargetOpcode::G_SADDO:
2190 Opcode = TargetOpcode::G_ADD;
2191 ExtOpcode = TargetOpcode::G_SEXT;
2192 break;
2193 case TargetOpcode::G_SSUBO:
2194 Opcode = TargetOpcode::G_SUB;
2195 ExtOpcode = TargetOpcode::G_SEXT;
2196 break;
2197 case TargetOpcode::G_UADDO:
2198 Opcode = TargetOpcode::G_ADD;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2200 break;
2201 case TargetOpcode::G_USUBO:
2202 Opcode = TargetOpcode::G_SUB;
2203 ExtOpcode = TargetOpcode::G_ZEXT;
2204 break;
2205 case TargetOpcode::G_SADDE:
2206 Opcode = TargetOpcode::G_UADDE;
2207 ExtOpcode = TargetOpcode::G_SEXT;
2208 CarryIn = MI.getOperand(4).getReg();
2209 break;
2210 case TargetOpcode::G_SSUBE:
2211 Opcode = TargetOpcode::G_USUBE;
2212 ExtOpcode = TargetOpcode::G_SEXT;
2213 CarryIn = MI.getOperand(4).getReg();
2214 break;
2215 case TargetOpcode::G_UADDE:
2216 Opcode = TargetOpcode::G_UADDE;
2217 ExtOpcode = TargetOpcode::G_ZEXT;
2218 CarryIn = MI.getOperand(4).getReg();
2219 break;
2220 case TargetOpcode::G_USUBE:
2221 Opcode = TargetOpcode::G_USUBE;
2222 ExtOpcode = TargetOpcode::G_ZEXT;
2223 CarryIn = MI.getOperand(4).getReg();
2224 break;
2225 }
2226
2227 if (TypeIdx == 1) {
2228 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2229
2231 if (CarryIn)
2232 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2233 widenScalarDst(MI, WideTy, 1);
2234
2236 return Legalized;
2237 }
2238
2239 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2240 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2241 // Do the arithmetic in the larger type.
2242 Register NewOp;
2243 if (CarryIn) {
2244 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2245 NewOp = MIRBuilder
2246 .buildInstr(Opcode, {WideTy, CarryOutTy},
2247 {LHSExt, RHSExt, *CarryIn})
2248 .getReg(0);
2249 } else {
2250 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2251 }
2252 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2253 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2254 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2255 // There is no overflow if the ExtOp is the same as NewOp.
2256 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2257 // Now trunc the NewOp to the original result.
2258 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2259 MI.eraseFromParent();
2260 return Legalized;
2261}
2262
2264LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2265 LLT WideTy) {
2266 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2267 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2268 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2269 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2270 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2271 // We can convert this to:
2272 // 1. Any extend iN to iM
2273 // 2. SHL by M-N
2274 // 3. [US][ADD|SUB|SHL]SAT
2275 // 4. L/ASHR by M-N
2276 //
2277 // It may be more efficient to lower this to a min and a max operation in
2278 // the higher precision arithmetic if the promoted operation isn't legal,
2279 // but this decision is up to the target's lowering request.
2280 Register DstReg = MI.getOperand(0).getReg();
2281
2282 unsigned NewBits = WideTy.getScalarSizeInBits();
2283 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2284
2285 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2286 // must not left shift the RHS to preserve the shift amount.
2287 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2288 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2289 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2290 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2291 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2292 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2293
2294 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2295 {ShiftL, ShiftR}, MI.getFlags());
2296
2297 // Use a shift that will preserve the number of sign bits when the trunc is
2298 // folded away.
2299 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2300 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2301
2302 MIRBuilder.buildTrunc(DstReg, Result);
2303 MI.eraseFromParent();
2304 return Legalized;
2305}
2306
2308LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2309 LLT WideTy) {
2310 if (TypeIdx == 1) {
2312 widenScalarDst(MI, WideTy, 1);
2314 return Legalized;
2315 }
2316
2317 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2318 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2319 LLT SrcTy = MRI.getType(LHS);
2320 LLT OverflowTy = MRI.getType(OriginalOverflow);
2321 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2322
2323 // To determine if the result overflowed in the larger type, we extend the
2324 // input to the larger type, do the multiply (checking if it overflows),
2325 // then also check the high bits of the result to see if overflow happened
2326 // there.
2327 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2328 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2329 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2330
2331 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2332 // so we don't need to check the overflow result of larger type Mulo.
2333 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2334
2335 unsigned MulOpc =
2336 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2337
2339 if (WideMulCanOverflow)
2340 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2341 {LeftOperand, RightOperand});
2342 else
2343 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2344
2345 auto Mul = Mulo->getOperand(0);
2346 MIRBuilder.buildTrunc(Result, Mul);
2347
2348 MachineInstrBuilder ExtResult;
2349 // Overflow occurred if it occurred in the larger type, or if the high part
2350 // of the result does not zero/sign-extend the low part. Check this second
2351 // possibility first.
2352 if (IsSigned) {
2353 // For signed, overflow occurred when the high part does not sign-extend
2354 // the low part.
2355 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2356 } else {
2357 // Unsigned overflow occurred when the high part does not zero-extend the
2358 // low part.
2359 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2360 }
2361
2362 if (WideMulCanOverflow) {
2363 auto Overflow =
2364 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2365 // Finally check if the multiplication in the larger type itself overflowed.
2366 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2367 } else {
2368 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2369 }
2370 MI.eraseFromParent();
2371 return Legalized;
2372}
2373
2376 switch (MI.getOpcode()) {
2377 default:
2378 return UnableToLegalize;
2379 case TargetOpcode::G_ATOMICRMW_XCHG:
2380 case TargetOpcode::G_ATOMICRMW_ADD:
2381 case TargetOpcode::G_ATOMICRMW_SUB:
2382 case TargetOpcode::G_ATOMICRMW_AND:
2383 case TargetOpcode::G_ATOMICRMW_OR:
2384 case TargetOpcode::G_ATOMICRMW_XOR:
2385 case TargetOpcode::G_ATOMICRMW_MIN:
2386 case TargetOpcode::G_ATOMICRMW_MAX:
2387 case TargetOpcode::G_ATOMICRMW_UMIN:
2388 case TargetOpcode::G_ATOMICRMW_UMAX:
2389 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2391 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2392 widenScalarDst(MI, WideTy, 0);
2394 return Legalized;
2395 case TargetOpcode::G_ATOMIC_CMPXCHG:
2396 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2398 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2399 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2400 widenScalarDst(MI, WideTy, 0);
2402 return Legalized;
2403 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2404 if (TypeIdx == 0) {
2406 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2407 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2408 widenScalarDst(MI, WideTy, 0);
2410 return Legalized;
2411 }
2412 assert(TypeIdx == 1 &&
2413 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2415 widenScalarDst(MI, WideTy, 1);
2417 return Legalized;
2418 case TargetOpcode::G_EXTRACT:
2419 return widenScalarExtract(MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_INSERT:
2421 return widenScalarInsert(MI, TypeIdx, WideTy);
2422 case TargetOpcode::G_MERGE_VALUES:
2423 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2424 case TargetOpcode::G_UNMERGE_VALUES:
2425 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2426 case TargetOpcode::G_SADDO:
2427 case TargetOpcode::G_SSUBO:
2428 case TargetOpcode::G_UADDO:
2429 case TargetOpcode::G_USUBO:
2430 case TargetOpcode::G_SADDE:
2431 case TargetOpcode::G_SSUBE:
2432 case TargetOpcode::G_UADDE:
2433 case TargetOpcode::G_USUBE:
2434 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2435 case TargetOpcode::G_UMULO:
2436 case TargetOpcode::G_SMULO:
2437 return widenScalarMulo(MI, TypeIdx, WideTy);
2438 case TargetOpcode::G_SADDSAT:
2439 case TargetOpcode::G_SSUBSAT:
2440 case TargetOpcode::G_SSHLSAT:
2441 case TargetOpcode::G_UADDSAT:
2442 case TargetOpcode::G_USUBSAT:
2443 case TargetOpcode::G_USHLSAT:
2444 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2445 case TargetOpcode::G_CTTZ:
2446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2447 case TargetOpcode::G_CTLZ:
2448 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2449 case TargetOpcode::G_CTPOP: {
2450 if (TypeIdx == 0) {
2452 widenScalarDst(MI, WideTy, 0);
2454 return Legalized;
2455 }
2456
2457 Register SrcReg = MI.getOperand(1).getReg();
2458
2459 // First extend the input.
2460 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2461 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2462 ? TargetOpcode::G_ANYEXT
2463 : TargetOpcode::G_ZEXT;
2464 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2465 LLT CurTy = MRI.getType(SrcReg);
2466 unsigned NewOpc = MI.getOpcode();
2467 if (NewOpc == TargetOpcode::G_CTTZ) {
2468 // The count is the same in the larger type except if the original
2469 // value was zero. This can be handled by setting the bit just off
2470 // the top of the original type.
2471 auto TopBit =
2473 MIBSrc = MIRBuilder.buildOr(
2474 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2475 // Now we know the operand is non-zero, use the more relaxed opcode.
2476 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2477 }
2478
2479 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2480
2481 if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2482 // An optimization where the result is the CTLZ after the left shift by
2483 // (Difference in widety and current ty), that is,
2484 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2485 // Result = ctlz MIBSrc
2486 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2487 MIRBuilder.buildConstant(WideTy, SizeDiff));
2488 }
2489
2490 // Perform the operation at the larger size.
2491 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2492 // This is already the correct result for CTPOP and CTTZs
2493 if (MI.getOpcode() == TargetOpcode::G_CTLZ) {
2494 // The correct result is NewOp - (Difference in widety and current ty).
2495 MIBNewOp = MIRBuilder.buildSub(
2496 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2497 }
2498
2499 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2500 MI.eraseFromParent();
2501 return Legalized;
2502 }
2503 case TargetOpcode::G_BSWAP: {
2505 Register DstReg = MI.getOperand(0).getReg();
2506
2507 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2508 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2509 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2510 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2511
2512 MI.getOperand(0).setReg(DstExt);
2513
2515
2516 LLT Ty = MRI.getType(DstReg);
2517 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2518 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2519 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2520
2521 MIRBuilder.buildTrunc(DstReg, ShrReg);
2523 return Legalized;
2524 }
2525 case TargetOpcode::G_BITREVERSE: {
2527
2528 Register DstReg = MI.getOperand(0).getReg();
2529 LLT Ty = MRI.getType(DstReg);
2530 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2531
2532 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2533 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2534 MI.getOperand(0).setReg(DstExt);
2536
2537 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2538 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2539 MIRBuilder.buildTrunc(DstReg, Shift);
2541 return Legalized;
2542 }
2543 case TargetOpcode::G_FREEZE:
2544 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2546 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2547 widenScalarDst(MI, WideTy);
2549 return Legalized;
2550
2551 case TargetOpcode::G_ABS:
2553 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2554 widenScalarDst(MI, WideTy);
2556 return Legalized;
2557
2558 case TargetOpcode::G_ADD:
2559 case TargetOpcode::G_AND:
2560 case TargetOpcode::G_MUL:
2561 case TargetOpcode::G_OR:
2562 case TargetOpcode::G_XOR:
2563 case TargetOpcode::G_SUB:
2564 case TargetOpcode::G_SHUFFLE_VECTOR:
2565 // Perform operation at larger width (any extension is fines here, high bits
2566 // don't affect the result) and then truncate the result back to the
2567 // original type.
2569 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2570 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2571 widenScalarDst(MI, WideTy);
2573 return Legalized;
2574
2575 case TargetOpcode::G_SBFX:
2576 case TargetOpcode::G_UBFX:
2578
2579 if (TypeIdx == 0) {
2580 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2581 widenScalarDst(MI, WideTy);
2582 } else {
2583 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2584 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2585 }
2586
2588 return Legalized;
2589
2590 case TargetOpcode::G_SHL:
2592
2593 if (TypeIdx == 0) {
2594 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2595 widenScalarDst(MI, WideTy);
2596 } else {
2597 assert(TypeIdx == 1);
2598 // The "number of bits to shift" operand must preserve its value as an
2599 // unsigned integer:
2600 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2601 }
2602
2604 return Legalized;
2605
2606 case TargetOpcode::G_ROTR:
2607 case TargetOpcode::G_ROTL:
2608 if (TypeIdx != 1)
2609 return UnableToLegalize;
2610
2612 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2614 return Legalized;
2615
2616 case TargetOpcode::G_SDIV:
2617 case TargetOpcode::G_SREM:
2618 case TargetOpcode::G_SMIN:
2619 case TargetOpcode::G_SMAX:
2621 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2622 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2623 widenScalarDst(MI, WideTy);
2625 return Legalized;
2626
2627 case TargetOpcode::G_SDIVREM:
2629 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2630 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2631 widenScalarDst(MI, WideTy);
2632 widenScalarDst(MI, WideTy, 1);
2634 return Legalized;
2635
2636 case TargetOpcode::G_ASHR:
2637 case TargetOpcode::G_LSHR:
2639
2640 if (TypeIdx == 0) {
2641 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2642 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2643
2644 widenScalarSrc(MI, WideTy, 1, CvtOp);
2645 widenScalarDst(MI, WideTy);
2646 } else {
2647 assert(TypeIdx == 1);
2648 // The "number of bits to shift" operand must preserve its value as an
2649 // unsigned integer:
2650 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2651 }
2652
2654 return Legalized;
2655 case TargetOpcode::G_UDIV:
2656 case TargetOpcode::G_UREM:
2657 case TargetOpcode::G_UMIN:
2658 case TargetOpcode::G_UMAX:
2660 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2661 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2662 widenScalarDst(MI, WideTy);
2664 return Legalized;
2665
2666 case TargetOpcode::G_UDIVREM:
2668 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2669 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2670 widenScalarDst(MI, WideTy);
2671 widenScalarDst(MI, WideTy, 1);
2673 return Legalized;
2674
2675 case TargetOpcode::G_SELECT:
2677 if (TypeIdx == 0) {
2678 // Perform operation at larger width (any extension is fine here, high
2679 // bits don't affect the result) and then truncate the result back to the
2680 // original type.
2681 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2682 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2683 widenScalarDst(MI, WideTy);
2684 } else {
2685 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2686 // Explicit extension is required here since high bits affect the result.
2687 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2688 }
2690 return Legalized;
2691
2692 case TargetOpcode::G_FPTOSI:
2693 case TargetOpcode::G_FPTOUI:
2694 case TargetOpcode::G_INTRINSIC_LRINT:
2695 case TargetOpcode::G_INTRINSIC_LLRINT:
2696 case TargetOpcode::G_IS_FPCLASS:
2698
2699 if (TypeIdx == 0)
2700 widenScalarDst(MI, WideTy);
2701 else
2702 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2703
2705 return Legalized;
2706 case TargetOpcode::G_SITOFP:
2708
2709 if (TypeIdx == 0)
2710 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2711 else
2712 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2713
2715 return Legalized;
2716 case TargetOpcode::G_UITOFP:
2718
2719 if (TypeIdx == 0)
2720 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2721 else
2722 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2723
2725 return Legalized;
2726 case TargetOpcode::G_LOAD:
2727 case TargetOpcode::G_SEXTLOAD:
2728 case TargetOpcode::G_ZEXTLOAD:
2730 widenScalarDst(MI, WideTy);
2732 return Legalized;
2733
2734 case TargetOpcode::G_STORE: {
2735 if (TypeIdx != 0)
2736 return UnableToLegalize;
2737
2738 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2739 if (!Ty.isScalar())
2740 return UnableToLegalize;
2741
2743
2744 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2745 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2746 widenScalarSrc(MI, WideTy, 0, ExtType);
2747
2749 return Legalized;
2750 }
2751 case TargetOpcode::G_CONSTANT: {
2752 MachineOperand &SrcMO = MI.getOperand(1);
2754 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2755 MRI.getType(MI.getOperand(0).getReg()));
2756 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2757 ExtOpc == TargetOpcode::G_ANYEXT) &&
2758 "Illegal Extend");
2759 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2760 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2761 ? SrcVal.sext(WideTy.getSizeInBits())
2762 : SrcVal.zext(WideTy.getSizeInBits());
2764 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2765
2766 widenScalarDst(MI, WideTy);
2768 return Legalized;
2769 }
2770 case TargetOpcode::G_FCONSTANT: {
2771 // To avoid changing the bits of the constant due to extension to a larger
2772 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2773 MachineOperand &SrcMO = MI.getOperand(1);
2774 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2776 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2777 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2778 MI.eraseFromParent();
2779 return Legalized;
2780 }
2781 case TargetOpcode::G_IMPLICIT_DEF: {
2783 widenScalarDst(MI, WideTy);
2785 return Legalized;
2786 }
2787 case TargetOpcode::G_BRCOND:
2789 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2791 return Legalized;
2792
2793 case TargetOpcode::G_FCMP:
2795 if (TypeIdx == 0)
2796 widenScalarDst(MI, WideTy);
2797 else {
2798 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2799 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2800 }
2802 return Legalized;
2803
2804 case TargetOpcode::G_ICMP:
2806 if (TypeIdx == 0)
2807 widenScalarDst(MI, WideTy);
2808 else {
2809 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2810 MI.getOperand(1).getPredicate()))
2811 ? TargetOpcode::G_SEXT
2812 : TargetOpcode::G_ZEXT;
2813 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2814 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2815 }
2817 return Legalized;
2818
2819 case TargetOpcode::G_PTR_ADD:
2820 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2822 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2824 return Legalized;
2825
2826 case TargetOpcode::G_PHI: {
2827 assert(TypeIdx == 0 && "Expecting only Idx 0");
2828
2830 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2831 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2833 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2834 }
2835
2836 MachineBasicBlock &MBB = *MI.getParent();
2838 widenScalarDst(MI, WideTy);
2840 return Legalized;
2841 }
2842 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2843 if (TypeIdx == 0) {
2844 Register VecReg = MI.getOperand(1).getReg();
2845 LLT VecTy = MRI.getType(VecReg);
2847
2849 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2850 TargetOpcode::G_ANYEXT);
2851
2852 widenScalarDst(MI, WideTy, 0);
2854 return Legalized;
2855 }
2856
2857 if (TypeIdx != 2)
2858 return UnableToLegalize;
2860 // TODO: Probably should be zext
2861 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2863 return Legalized;
2864 }
2865 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2866 if (TypeIdx == 0) {
2868 const LLT WideEltTy = WideTy.getElementType();
2869
2870 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2871 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2872 widenScalarDst(MI, WideTy, 0);
2874 return Legalized;
2875 }
2876
2877 if (TypeIdx == 1) {
2879
2880 Register VecReg = MI.getOperand(1).getReg();
2881 LLT VecTy = MRI.getType(VecReg);
2882 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2883
2884 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2885 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2886 widenScalarDst(MI, WideVecTy, 0);
2888 return Legalized;
2889 }
2890
2891 if (TypeIdx == 2) {
2893 // TODO: Probably should be zext
2894 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2896 return Legalized;
2897 }
2898
2899 return UnableToLegalize;
2900 }
2901 case TargetOpcode::G_FADD:
2902 case TargetOpcode::G_FMUL:
2903 case TargetOpcode::G_FSUB:
2904 case TargetOpcode::G_FMA:
2905 case TargetOpcode::G_FMAD:
2906 case TargetOpcode::G_FNEG:
2907 case TargetOpcode::G_FABS:
2908 case TargetOpcode::G_FCANONICALIZE:
2909 case TargetOpcode::G_FMINNUM:
2910 case TargetOpcode::G_FMAXNUM:
2911 case TargetOpcode::G_FMINNUM_IEEE:
2912 case TargetOpcode::G_FMAXNUM_IEEE:
2913 case TargetOpcode::G_FMINIMUM:
2914 case TargetOpcode::G_FMAXIMUM:
2915 case TargetOpcode::G_FDIV:
2916 case TargetOpcode::G_FREM:
2917 case TargetOpcode::G_FCEIL:
2918 case TargetOpcode::G_FFLOOR:
2919 case TargetOpcode::G_FCOS:
2920 case TargetOpcode::G_FSIN:
2921 case TargetOpcode::G_FTAN:
2922 case TargetOpcode::G_FACOS:
2923 case TargetOpcode::G_FASIN:
2924 case TargetOpcode::G_FATAN:
2925 case TargetOpcode::G_FCOSH:
2926 case TargetOpcode::G_FSINH:
2927 case TargetOpcode::G_FTANH:
2928 case TargetOpcode::G_FLOG10:
2929 case TargetOpcode::G_FLOG:
2930 case TargetOpcode::G_FLOG2:
2931 case TargetOpcode::G_FRINT:
2932 case TargetOpcode::G_FNEARBYINT:
2933 case TargetOpcode::G_FSQRT:
2934 case TargetOpcode::G_FEXP:
2935 case TargetOpcode::G_FEXP2:
2936 case TargetOpcode::G_FEXP10:
2937 case TargetOpcode::G_FPOW:
2938 case TargetOpcode::G_INTRINSIC_TRUNC:
2939 case TargetOpcode::G_INTRINSIC_ROUND:
2940 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2941 assert(TypeIdx == 0);
2943
2944 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2945 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2946
2947 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2949 return Legalized;
2950 case TargetOpcode::G_FPOWI:
2951 case TargetOpcode::G_FLDEXP:
2952 case TargetOpcode::G_STRICT_FLDEXP: {
2953 if (TypeIdx == 0) {
2954 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2955 return UnableToLegalize;
2956
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2959 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2961 return Legalized;
2962 }
2963
2964 if (TypeIdx == 1) {
2965 // For some reason SelectionDAG tries to promote to a libcall without
2966 // actually changing the integer type for promotion.
2968 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2970 return Legalized;
2971 }
2972
2973 return UnableToLegalize;
2974 }
2975 case TargetOpcode::G_FFREXP: {
2977
2978 if (TypeIdx == 0) {
2979 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2980 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2981 } else {
2982 widenScalarDst(MI, WideTy, 1);
2983 }
2984
2986 return Legalized;
2987 }
2988 case TargetOpcode::G_INTTOPTR:
2989 if (TypeIdx != 1)
2990 return UnableToLegalize;
2991
2993 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2995 return Legalized;
2996 case TargetOpcode::G_PTRTOINT:
2997 if (TypeIdx != 0)
2998 return UnableToLegalize;
2999
3001 widenScalarDst(MI, WideTy, 0);
3003 return Legalized;
3004 case TargetOpcode::G_BUILD_VECTOR: {
3006
3007 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3008 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3009 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3010
3011 // Avoid changing the result vector type if the source element type was
3012 // requested.
3013 if (TypeIdx == 1) {
3014 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3015 } else {
3016 widenScalarDst(MI, WideTy, 0);
3017 }
3018
3020 return Legalized;
3021 }
3022 case TargetOpcode::G_SEXT_INREG:
3023 if (TypeIdx != 0)
3024 return UnableToLegalize;
3025
3027 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3028 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3030 return Legalized;
3031 case TargetOpcode::G_PTRMASK: {
3032 if (TypeIdx != 1)
3033 return UnableToLegalize;
3035 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3037 return Legalized;
3038 }
3039 case TargetOpcode::G_VECREDUCE_FADD:
3040 case TargetOpcode::G_VECREDUCE_FMUL:
3041 case TargetOpcode::G_VECREDUCE_FMIN:
3042 case TargetOpcode::G_VECREDUCE_FMAX:
3043 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3044 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3045 if (TypeIdx != 0)
3046 return UnableToLegalize;
3048 Register VecReg = MI.getOperand(1).getReg();
3049 LLT VecTy = MRI.getType(VecReg);
3050 LLT WideVecTy = VecTy.isVector()
3051 ? LLT::vector(VecTy.getElementCount(), WideTy)
3052 : WideTy;
3053 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3054 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3056 return Legalized;
3057 }
3058 case TargetOpcode::G_VSCALE: {
3059 MachineOperand &SrcMO = MI.getOperand(1);
3061 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3062 // The CImm is always a signed value
3063 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3065 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3066 widenScalarDst(MI, WideTy);
3068 return Legalized;
3069 }
3070 case TargetOpcode::G_SPLAT_VECTOR: {
3071 if (TypeIdx != 1)
3072 return UnableToLegalize;
3073
3075 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3077 return Legalized;
3078 }
3079 }
3080}
3081
3083 MachineIRBuilder &B, Register Src, LLT Ty) {
3084 auto Unmerge = B.buildUnmerge(Ty, Src);
3085 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3086 Pieces.push_back(Unmerge.getReg(I));
3087}
3088
3089static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3090 MachineIRBuilder &MIRBuilder) {
3091 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3092 MachineFunction &MF = MIRBuilder.getMF();
3093 const DataLayout &DL = MIRBuilder.getDataLayout();
3094 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3095 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3096 LLT DstLLT = MRI.getType(DstReg);
3097
3098 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3099
3100 auto Addr = MIRBuilder.buildConstantPool(
3101 AddrPtrTy,
3102 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3103
3104 MachineMemOperand *MMO =
3106 MachineMemOperand::MOLoad, DstLLT, Alignment);
3107
3108 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3109}
3110
3113 const MachineOperand &ConstOperand = MI.getOperand(1);
3114 const Constant *ConstantVal = ConstOperand.getCImm();
3115
3116 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3117 MI.eraseFromParent();
3118
3119 return Legalized;
3120}
3121
3124 const MachineOperand &ConstOperand = MI.getOperand(1);
3125 const Constant *ConstantVal = ConstOperand.getFPImm();
3126
3127 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3128 MI.eraseFromParent();
3129
3130 return Legalized;
3131}
3132
3135 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3136 if (SrcTy.isVector()) {
3137 LLT SrcEltTy = SrcTy.getElementType();
3139
3140 if (DstTy.isVector()) {
3141 int NumDstElt = DstTy.getNumElements();
3142 int NumSrcElt = SrcTy.getNumElements();
3143
3144 LLT DstEltTy = DstTy.getElementType();
3145 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3146 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3147
3148 // If there's an element size mismatch, insert intermediate casts to match
3149 // the result element type.
3150 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3151 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3152 //
3153 // =>
3154 //
3155 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3156 // %3:_(<2 x s8>) = G_BITCAST %2
3157 // %4:_(<2 x s8>) = G_BITCAST %3
3158 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3159 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3160 SrcPartTy = SrcEltTy;
3161 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3162 //
3163 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3164 //
3165 // =>
3166 //
3167 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3168 // %3:_(s16) = G_BITCAST %2
3169 // %4:_(s16) = G_BITCAST %3
3170 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3171 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3172 DstCastTy = DstEltTy;
3173 }
3174
3175 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3176 for (Register &SrcReg : SrcRegs)
3177 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3178 } else
3179 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3180
3181 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3182 MI.eraseFromParent();
3183 return Legalized;
3184 }
3185
3186 if (DstTy.isVector()) {
3188 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3189 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3190 MI.eraseFromParent();
3191 return Legalized;
3192 }
3193
3194 return UnableToLegalize;
3195}
3196
3197/// Figure out the bit offset into a register when coercing a vector index for
3198/// the wide element type. This is only for the case when promoting vector to
3199/// one with larger elements.
3200//
3201///
3202/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3203/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3205 Register Idx,
3206 unsigned NewEltSize,
3207 unsigned OldEltSize) {
3208 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3209 LLT IdxTy = B.getMRI()->getType(Idx);
3210
3211 // Now figure out the amount we need to shift to get the target bits.
3212 auto OffsetMask = B.buildConstant(
3213 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3214 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3215 return B.buildShl(IdxTy, OffsetIdx,
3216 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3217}
3218
3219/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3220/// is casting to a vector with a smaller element size, perform multiple element
3221/// extracts and merge the results. If this is coercing to a vector with larger
3222/// elements, index the bitcasted vector and extract the target element with bit
3223/// operations. This is intended to force the indexing in the native register
3224/// size for architectures that can dynamically index the register file.
3227 LLT CastTy) {
3228 if (TypeIdx != 1)
3229 return UnableToLegalize;
3230
3231 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3232
3233 LLT SrcEltTy = SrcVecTy.getElementType();
3234 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3235 unsigned OldNumElts = SrcVecTy.getNumElements();
3236
3237 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3238 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3239
3240 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3241 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3242 if (NewNumElts > OldNumElts) {
3243 // Decreasing the vector element size
3244 //
3245 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3246 // =>
3247 // v4i32:castx = bitcast x:v2i64
3248 //
3249 // i64 = bitcast
3250 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3251 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3252 //
3253 if (NewNumElts % OldNumElts != 0)
3254 return UnableToLegalize;
3255
3256 // Type of the intermediate result vector.
3257 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3258 LLT MidTy =
3259 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3260
3261 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3262
3263 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3264 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3265
3266 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3267 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3268 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3269 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3270 NewOps[I] = Elt.getReg(0);
3271 }
3272
3273 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3274 MIRBuilder.buildBitcast(Dst, NewVec);
3275 MI.eraseFromParent();
3276 return Legalized;
3277 }
3278
3279 if (NewNumElts < OldNumElts) {
3280 if (NewEltSize % OldEltSize != 0)
3281 return UnableToLegalize;
3282
3283 // This only depends on powers of 2 because we use bit tricks to figure out
3284 // the bit offset we need to shift to get the target element. A general
3285 // expansion could emit division/multiply.
3286 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3287 return UnableToLegalize;
3288
3289 // Increasing the vector element size.
3290 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3291 //
3292 // =>
3293 //
3294 // %cast = G_BITCAST %vec
3295 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3296 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3297 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3298 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3299 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3300 // %elt = G_TRUNC %elt_bits
3301
3302 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3303 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3304
3305 // Divide to get the index in the wider element type.
3306 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3307
3308 Register WideElt = CastVec;
3309 if (CastTy.isVector()) {
3310 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3311 ScaledIdx).getReg(0);
3312 }
3313
3314 // Compute the bit offset into the register of the target element.
3316 MIRBuilder, Idx, NewEltSize, OldEltSize);
3317
3318 // Shift the wide element to get the target element.
3319 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3320 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3321 MI.eraseFromParent();
3322 return Legalized;
3323 }
3324
3325 return UnableToLegalize;
3326}
3327
3328/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3329/// TargetReg, while preserving other bits in \p TargetReg.
3330///
3331/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3333 Register TargetReg, Register InsertReg,
3334 Register OffsetBits) {
3335 LLT TargetTy = B.getMRI()->getType(TargetReg);
3336 LLT InsertTy = B.getMRI()->getType(InsertReg);
3337 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3338 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3339
3340 // Produce a bitmask of the value to insert
3341 auto EltMask = B.buildConstant(
3342 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3343 InsertTy.getSizeInBits()));
3344 // Shift it into position
3345 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3346 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3347
3348 // Clear out the bits in the wide element
3349 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3350
3351 // The value to insert has all zeros already, so stick it into the masked
3352 // wide element.
3353 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3354}
3355
3356/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3357/// is increasing the element size, perform the indexing in the target element
3358/// type, and use bit operations to insert at the element position. This is
3359/// intended for architectures that can dynamically index the register file and
3360/// want to force indexing in the native register size.
3363 LLT CastTy) {
3364 if (TypeIdx != 0)
3365 return UnableToLegalize;
3366
3367 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3368 MI.getFirst4RegLLTs();
3369 LLT VecTy = DstTy;
3370
3371 LLT VecEltTy = VecTy.getElementType();
3372 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3373 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3374 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3375
3376 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3377 unsigned OldNumElts = VecTy.getNumElements();
3378
3379 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3380 if (NewNumElts < OldNumElts) {
3381 if (NewEltSize % OldEltSize != 0)
3382 return UnableToLegalize;
3383
3384 // This only depends on powers of 2 because we use bit tricks to figure out
3385 // the bit offset we need to shift to get the target element. A general
3386 // expansion could emit division/multiply.
3387 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3388 return UnableToLegalize;
3389
3390 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3391 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3392
3393 // Divide to get the index in the wider element type.
3394 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3395
3396 Register ExtractedElt = CastVec;
3397 if (CastTy.isVector()) {
3398 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3399 ScaledIdx).getReg(0);
3400 }
3401
3402 // Compute the bit offset into the register of the target element.
3404 MIRBuilder, Idx, NewEltSize, OldEltSize);
3405
3406 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3407 Val, OffsetBits);
3408 if (CastTy.isVector()) {
3410 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3411 }
3412
3413 MIRBuilder.buildBitcast(Dst, InsertedElt);
3414 MI.eraseFromParent();
3415 return Legalized;
3416 }
3417
3418 return UnableToLegalize;
3419}
3420
3421// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3422// those that have smaller than legal operands.
3423//
3424// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3425//
3426// ===>
3427//
3428// s32 = G_BITCAST <4 x s8>
3429// s32 = G_BITCAST <4 x s8>
3430// s32 = G_BITCAST <4 x s8>
3431// s32 = G_BITCAST <4 x s8>
3432// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3433// <16 x s8> = G_BITCAST <4 x s32>
3436 LLT CastTy) {
3437 // Convert it to CONCAT instruction
3438 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3439 if (!ConcatMI) {
3440 return UnableToLegalize;
3441 }
3442
3443 // Check if bitcast is Legal
3444 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3445 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3446
3447 // Check if the build vector is Legal
3448 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3449 return UnableToLegalize;
3450 }
3451
3452 // Bitcast the sources
3453 SmallVector<Register> BitcastRegs;
3454 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3455 BitcastRegs.push_back(
3456 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3457 .getReg(0));
3458 }
3459
3460 // Build the scalar values into a vector
3461 Register BuildReg =
3462 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3463 MIRBuilder.buildBitcast(DstReg, BuildReg);
3464
3465 MI.eraseFromParent();
3466 return Legalized;
3467}
3468
3470 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3471 Register DstReg = LoadMI.getDstReg();
3472 Register PtrReg = LoadMI.getPointerReg();
3473 LLT DstTy = MRI.getType(DstReg);
3474 MachineMemOperand &MMO = LoadMI.getMMO();
3475 LLT MemTy = MMO.getMemoryType();
3477
3478 unsigned MemSizeInBits = MemTy.getSizeInBits();
3479 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3480
3481 if (MemSizeInBits != MemStoreSizeInBits) {
3482 if (MemTy.isVector())
3483 return UnableToLegalize;
3484
3485 // Promote to a byte-sized load if not loading an integral number of
3486 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3487 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3488 MachineMemOperand *NewMMO =
3489 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3490
3491 Register LoadReg = DstReg;
3492 LLT LoadTy = DstTy;
3493
3494 // If this wasn't already an extending load, we need to widen the result
3495 // register to avoid creating a load with a narrower result than the source.
3496 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3497 LoadTy = WideMemTy;
3498 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3499 }
3500
3501 if (isa<GSExtLoad>(LoadMI)) {
3502 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3503 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3504 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3505 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3506 // The extra bits are guaranteed to be zero, since we stored them that
3507 // way. A zext load from Wide thus automatically gives zext from MemVT.
3508 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3509 } else {
3510 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3511 }
3512
3513 if (DstTy != LoadTy)
3514 MIRBuilder.buildTrunc(DstReg, LoadReg);
3515
3516 LoadMI.eraseFromParent();
3517 return Legalized;
3518 }
3519
3520 // Big endian lowering not implemented.
3522 return UnableToLegalize;
3523
3524 // This load needs splitting into power of 2 sized loads.
3525 //
3526 // Our strategy here is to generate anyextending loads for the smaller
3527 // types up to next power-2 result type, and then combine the two larger
3528 // result values together, before truncating back down to the non-pow-2
3529 // type.
3530 // E.g. v1 = i24 load =>
3531 // v2 = i32 zextload (2 byte)
3532 // v3 = i32 load (1 byte)
3533 // v4 = i32 shl v3, 16
3534 // v5 = i32 or v4, v2
3535 // v1 = i24 trunc v5
3536 // By doing this we generate the correct truncate which should get
3537 // combined away as an artifact with a matching extend.
3538
3539 uint64_t LargeSplitSize, SmallSplitSize;
3540
3541 if (!isPowerOf2_32(MemSizeInBits)) {
3542 // This load needs splitting into power of 2 sized loads.
3543 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3544 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3545 } else {
3546 // This is already a power of 2, but we still need to split this in half.
3547 //
3548 // Assume we're being asked to decompose an unaligned load.
3549 // TODO: If this requires multiple splits, handle them all at once.
3550 auto &Ctx = MF.getFunction().getContext();
3551 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3552 return UnableToLegalize;
3553
3554 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3555 }
3556
3557 if (MemTy.isVector()) {
3558 // TODO: Handle vector extloads
3559 if (MemTy != DstTy)
3560 return UnableToLegalize;
3561
3562 // TODO: We can do better than scalarizing the vector and at least split it
3563 // in half.
3564 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3565 }
3566
3567 MachineMemOperand *LargeMMO =
3568 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3569 MachineMemOperand *SmallMMO =
3570 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3571
3572 LLT PtrTy = MRI.getType(PtrReg);
3573 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3574 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3575 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3576 PtrReg, *LargeMMO);
3577
3578 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3579 LargeSplitSize / 8);
3580 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3581 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3582 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3583 SmallPtr, *SmallMMO);
3584
3585 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3586 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3587
3588 if (AnyExtTy == DstTy)
3589 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3590 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3591 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3592 MIRBuilder.buildTrunc(DstReg, {Or});
3593 } else {
3594 assert(DstTy.isPointer() && "expected pointer");
3595 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3596
3597 // FIXME: We currently consider this to be illegal for non-integral address
3598 // spaces, but we need still need a way to reinterpret the bits.
3599 MIRBuilder.buildIntToPtr(DstReg, Or);
3600 }
3601
3602 LoadMI.eraseFromParent();
3603 return Legalized;
3604}
3605
3607 // Lower a non-power of 2 store into multiple pow-2 stores.
3608 // E.g. split an i24 store into an i16 store + i8 store.
3609 // We do this by first extending the stored value to the next largest power
3610 // of 2 type, and then using truncating stores to store the components.
3611 // By doing this, likewise with G_LOAD, generate an extend that can be
3612 // artifact-combined away instead of leaving behind extracts.
3613 Register SrcReg = StoreMI.getValueReg();
3614 Register PtrReg = StoreMI.getPointerReg();
3615 LLT SrcTy = MRI.getType(SrcReg);
3617 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3618 LLT MemTy = MMO.getMemoryType();
3619
3620 unsigned StoreWidth = MemTy.getSizeInBits();
3621 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3622
3623 if (StoreWidth != StoreSizeInBits) {
3624 if (SrcTy.isVector())
3625 return UnableToLegalize;
3626
3627 // Promote to a byte-sized store with upper bits zero if not
3628 // storing an integral number of bytes. For example, promote
3629 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3630 LLT WideTy = LLT::scalar(StoreSizeInBits);
3631
3632 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3633 // Avoid creating a store with a narrower source than result.
3634 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3635 SrcTy = WideTy;
3636 }
3637
3638 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3639
3640 MachineMemOperand *NewMMO =
3641 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3642 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3643 StoreMI.eraseFromParent();
3644 return Legalized;
3645 }
3646
3647 if (MemTy.isVector()) {
3648 // TODO: Handle vector trunc stores
3649 if (MemTy != SrcTy)
3650 return UnableToLegalize;
3651
3652 // TODO: We can do better than scalarizing the vector and at least split it
3653 // in half.
3654 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3655 }
3656
3657 unsigned MemSizeInBits = MemTy.getSizeInBits();
3658 uint64_t LargeSplitSize, SmallSplitSize;
3659
3660 if (!isPowerOf2_32(MemSizeInBits)) {
3661 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3662 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3663 } else {
3664 auto &Ctx = MF.getFunction().getContext();
3665 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3666 return UnableToLegalize; // Don't know what we're being asked to do.
3667
3668 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3669 }
3670
3671 // Extend to the next pow-2. If this store was itself the result of lowering,
3672 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3673 // that's wider than the stored size.
3674 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3675 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3676
3677 if (SrcTy.isPointer()) {
3678 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3679 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3680 }
3681
3682 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3683
3684 // Obtain the smaller value by shifting away the larger value.
3685 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3686 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3687
3688 // Generate the PtrAdd and truncating stores.
3689 LLT PtrTy = MRI.getType(PtrReg);
3690 auto OffsetCst = MIRBuilder.buildConstant(
3691 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3692 auto SmallPtr =
3693 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3694
3695 MachineMemOperand *LargeMMO =
3696 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3697 MachineMemOperand *SmallMMO =
3698 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3699 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3700 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3701 StoreMI.eraseFromParent();
3702 return Legalized;
3703}
3704
3706LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3707 switch (MI.getOpcode()) {
3708 case TargetOpcode::G_LOAD: {
3709 if (TypeIdx != 0)
3710 return UnableToLegalize;
3711 MachineMemOperand &MMO = **MI.memoperands_begin();
3712
3713 // Not sure how to interpret a bitcast of an extending load.
3714 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3715 return UnableToLegalize;
3716
3718 bitcastDst(MI, CastTy, 0);
3719 MMO.setType(CastTy);
3720 // The range metadata is no longer valid when reinterpreted as a different
3721 // type.
3722 MMO.clearRanges();
3724 return Legalized;
3725 }
3726 case TargetOpcode::G_STORE: {
3727 if (TypeIdx != 0)
3728 return UnableToLegalize;
3729
3730 MachineMemOperand &MMO = **MI.memoperands_begin();
3731
3732 // Not sure how to interpret a bitcast of a truncating store.
3733 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3734 return UnableToLegalize;
3735
3737 bitcastSrc(MI, CastTy, 0);
3738 MMO.setType(CastTy);
3740 return Legalized;
3741 }
3742 case TargetOpcode::G_SELECT: {
3743 if (TypeIdx != 0)
3744 return UnableToLegalize;
3745
3746 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3747 LLVM_DEBUG(
3748 dbgs() << "bitcast action not implemented for vector select\n");
3749 return UnableToLegalize;
3750 }
3751
3753 bitcastSrc(MI, CastTy, 2);
3754 bitcastSrc(MI, CastTy, 3);
3755 bitcastDst(MI, CastTy, 0);
3757 return Legalized;
3758 }
3759 case TargetOpcode::G_AND:
3760 case TargetOpcode::G_OR:
3761 case TargetOpcode::G_XOR: {
3763 bitcastSrc(MI, CastTy, 1);
3764 bitcastSrc(MI, CastTy, 2);
3765 bitcastDst(MI, CastTy, 0);
3767 return Legalized;
3768 }
3769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3770 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3771 case TargetOpcode::G_INSERT_VECTOR_ELT:
3772 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3773 case TargetOpcode::G_CONCAT_VECTORS:
3774 return bitcastConcatVector(MI, TypeIdx, CastTy);
3775 default:
3776 return UnableToLegalize;
3777 }
3778}
3779
3780// Legalize an instruction by changing the opcode in place.
3781void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3783 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3785}
3786
3788LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3789 using namespace TargetOpcode;
3790
3791 switch(MI.getOpcode()) {
3792 default:
3793 return UnableToLegalize;
3794 case TargetOpcode::G_FCONSTANT:
3795 return lowerFConstant(MI);
3796 case TargetOpcode::G_BITCAST:
3797 return lowerBitcast(MI);
3798 case TargetOpcode::G_SREM:
3799 case TargetOpcode::G_UREM: {
3800 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3801 auto Quot =
3802 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3803 {MI.getOperand(1), MI.getOperand(2)});
3804
3805 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3806 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3807 MI.eraseFromParent();
3808 return Legalized;
3809 }
3810 case TargetOpcode::G_SADDO:
3811 case TargetOpcode::G_SSUBO:
3812 return lowerSADDO_SSUBO(MI);
3813 case TargetOpcode::G_UMULH:
3814 case TargetOpcode::G_SMULH:
3815 return lowerSMULH_UMULH(MI);
3816 case TargetOpcode::G_SMULO:
3817 case TargetOpcode::G_UMULO: {
3818 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3819 // result.
3820 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3821 LLT Ty = MRI.getType(Res);
3822
3823 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3824 ? TargetOpcode::G_SMULH
3825 : TargetOpcode::G_UMULH;
3826
3828 const auto &TII = MIRBuilder.getTII();
3829 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3830 MI.removeOperand(1);
3832
3833 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3834 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3835
3836 // Move insert point forward so we can use the Res register if needed.
3838
3839 // For *signed* multiply, overflow is detected by checking:
3840 // (hi != (lo >> bitwidth-1))
3841 if (Opcode == TargetOpcode::G_SMULH) {
3842 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3843 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3844 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3845 } else {
3846 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3847 }
3848 return Legalized;
3849 }
3850 case TargetOpcode::G_FNEG: {
3851 auto [Res, SubByReg] = MI.getFirst2Regs();
3852 LLT Ty = MRI.getType(Res);
3853
3854 // TODO: Handle vector types once we are able to
3855 // represent them.
3856 if (Ty.isVector())
3857 return UnableToLegalize;
3858 auto SignMask =
3860 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3861 MI.eraseFromParent();
3862 return Legalized;
3863 }
3864 case TargetOpcode::G_FSUB:
3865 case TargetOpcode::G_STRICT_FSUB: {
3866 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3867 LLT Ty = MRI.getType(Res);
3868
3869 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3870 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3871
3872 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3873 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3874 else
3875 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3876
3877 MI.eraseFromParent();
3878 return Legalized;
3879 }
3880 case TargetOpcode::G_FMAD:
3881 return lowerFMad(MI);
3882 case TargetOpcode::G_FFLOOR:
3883 return lowerFFloor(MI);
3884 case TargetOpcode::G_INTRINSIC_ROUND:
3885 return lowerIntrinsicRound(MI);
3886 case TargetOpcode::G_FRINT: {
3887 // Since round even is the assumed rounding mode for unconstrained FP
3888 // operations, rint and roundeven are the same operation.
3889 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3890 return Legalized;
3891 }
3892 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3893 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3894 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3895 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3896 **MI.memoperands_begin());
3897 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3898 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3899 MI.eraseFromParent();
3900 return Legalized;
3901 }
3902 case TargetOpcode::G_LOAD:
3903 case TargetOpcode::G_SEXTLOAD:
3904 case TargetOpcode::G_ZEXTLOAD:
3905 return lowerLoad(cast<GAnyLoad>(MI));
3906 case TargetOpcode::G_STORE:
3907 return lowerStore(cast<GStore>(MI));
3908 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3909 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3910 case TargetOpcode::G_CTLZ:
3911 case TargetOpcode::G_CTTZ:
3912 case TargetOpcode::G_CTPOP:
3913 return lowerBitCount(MI);
3914 case G_UADDO: {
3915 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3916
3917 Register NewRes = MRI.cloneVirtualRegister(Res);
3918
3919 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3920 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3921
3922 MIRBuilder.buildCopy(Res, NewRes);
3923
3924 MI.eraseFromParent();
3925 return Legalized;
3926 }
3927 case G_UADDE: {
3928 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3929 const LLT CondTy = MRI.getType(CarryOut);
3930 const LLT Ty = MRI.getType(Res);
3931
3932 Register NewRes = MRI.cloneVirtualRegister(Res);
3933
3934 // Initial add of the two operands.
3935 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3936
3937 // Initial check for carry.
3938 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3939
3940 // Add the sum and the carry.
3941 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3942 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3943
3944 // Second check for carry. We can only carry if the initial sum is all 1s
3945 // and the carry is set, resulting in a new sum of 0.
3946 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3947 auto ResEqZero =
3948 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3949 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3950 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3951
3952 MIRBuilder.buildCopy(Res, NewRes);
3953
3954 MI.eraseFromParent();
3955 return Legalized;
3956 }
3957 case G_USUBO: {
3958 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3959
3960 MIRBuilder.buildSub(Res, LHS, RHS);
3962
3963 MI.eraseFromParent();
3964 return Legalized;
3965 }
3966 case G_USUBE: {
3967 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3968 const LLT CondTy = MRI.getType(BorrowOut);
3969 const LLT Ty = MRI.getType(Res);
3970
3971 // Initial subtract of the two operands.
3972 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3973
3974 // Initial check for borrow.
3975 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3976
3977 // Subtract the borrow from the first subtract.
3978 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3979 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3980
3981 // Second check for borrow. We can only borrow if the initial difference is
3982 // 0 and the borrow is set, resulting in a new difference of all 1s.
3983 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3984 auto TmpResEqZero =
3985 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3986 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3987 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3988
3989 MI.eraseFromParent();
3990 return Legalized;
3991 }
3992 case G_UITOFP:
3993 return lowerUITOFP(MI);
3994 case G_SITOFP:
3995 return lowerSITOFP(MI);
3996 case G_FPTOUI:
3997 return lowerFPTOUI(MI);
3998 case G_FPTOSI:
3999 return lowerFPTOSI(MI);
4000 case G_FPTRUNC:
4001 return lowerFPTRUNC(MI);
4002 case G_FPOWI:
4003 return lowerFPOWI(MI);
4004 case G_SMIN:
4005 case G_SMAX:
4006 case G_UMIN:
4007 case G_UMAX:
4008 return lowerMinMax(MI);
4009 case G_FCOPYSIGN:
4010 return lowerFCopySign(MI);
4011 case G_FMINNUM:
4012 case G_FMAXNUM:
4013 return lowerFMinNumMaxNum(MI);
4014 case G_MERGE_VALUES:
4015 return lowerMergeValues(MI);
4016 case G_UNMERGE_VALUES:
4017 return lowerUnmergeValues(MI);
4018 case TargetOpcode::G_SEXT_INREG: {
4019 assert(MI.getOperand(2).isImm() && "Expected immediate");
4020 int64_t SizeInBits = MI.getOperand(2).getImm();
4021
4022 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4023 LLT DstTy = MRI.getType(DstReg);
4024 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4025
4026 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4027 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4028 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4029 MI.eraseFromParent();
4030 return Legalized;
4031 }
4032 case G_EXTRACT_VECTOR_ELT:
4033 case G_INSERT_VECTOR_ELT:
4035 case G_SHUFFLE_VECTOR:
4036 return lowerShuffleVector(MI);
4037 case G_VECTOR_COMPRESS:
4038 return lowerVECTOR_COMPRESS(MI);
4039 case G_DYN_STACKALLOC:
4040 return lowerDynStackAlloc(MI);
4041 case G_STACKSAVE:
4042 return lowerStackSave(MI);
4043 case G_STACKRESTORE:
4044 return lowerStackRestore(MI);
4045 case G_EXTRACT:
4046 return lowerExtract(MI);
4047 case G_INSERT:
4048 return lowerInsert(MI);
4049 case G_BSWAP:
4050 return lowerBswap(MI);
4051 case G_BITREVERSE:
4052 return lowerBitreverse(MI);
4053 case G_READ_REGISTER:
4054 case G_WRITE_REGISTER:
4055 return lowerReadWriteRegister(MI);
4056 case G_UADDSAT:
4057 case G_USUBSAT: {
4058 // Try to make a reasonable guess about which lowering strategy to use. The
4059 // target can override this with custom lowering and calling the
4060 // implementation functions.
4061 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4062 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4063 return lowerAddSubSatToMinMax(MI);
4065 }
4066 case G_SADDSAT:
4067 case G_SSUBSAT: {
4068 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4069
4070 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4071 // since it's a shorter expansion. However, we would need to figure out the
4072 // preferred boolean type for the carry out for the query.
4073 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4074 return lowerAddSubSatToMinMax(MI);
4076 }
4077 case G_SSHLSAT:
4078 case G_USHLSAT:
4079 return lowerShlSat(MI);
4080 case G_ABS:
4081 return lowerAbsToAddXor(MI);
4082 case G_SELECT:
4083 return lowerSelect(MI);
4084 case G_IS_FPCLASS:
4085 return lowerISFPCLASS(MI);
4086 case G_SDIVREM:
4087 case G_UDIVREM:
4088 return lowerDIVREM(MI);
4089 case G_FSHL:
4090 case G_FSHR:
4091 return lowerFunnelShift(MI);
4092 case G_ROTL:
4093 case G_ROTR:
4094 return lowerRotate(MI);
4095 case G_MEMSET:
4096 case G_MEMCPY:
4097 case G_MEMMOVE:
4098 return lowerMemCpyFamily(MI);
4099 case G_MEMCPY_INLINE:
4100 return lowerMemcpyInline(MI);
4101 case G_ZEXT:
4102 case G_SEXT:
4103 case G_ANYEXT:
4104 return lowerEXT(MI);
4105 case G_TRUNC:
4106 return lowerTRUNC(MI);
4108 return lowerVectorReduction(MI);
4109 case G_VAARG:
4110 return lowerVAArg(MI);
4111 }
4112}
4113
4115 Align MinAlign) const {
4116 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4117 // datalayout for the preferred alignment. Also there should be a target hook
4118 // for this to allow targets to reduce the alignment and ignore the
4119 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4120 // the type.
4121 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4122}
4123
4126 MachinePointerInfo &PtrInfo) {
4129 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4130
4131 unsigned AddrSpace = DL.getAllocaAddrSpace();
4132 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4133
4134 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4135 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4136}
4137
4139 LLT VecTy) {
4140 LLT IdxTy = B.getMRI()->getType(IdxReg);
4141 unsigned NElts = VecTy.getNumElements();
4142
4143 int64_t IdxVal;
4144 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4145 if (IdxVal < VecTy.getNumElements())
4146 return IdxReg;
4147 // If a constant index would be out of bounds, clamp it as well.
4148 }
4149
4150 if (isPowerOf2_32(NElts)) {
4151 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4152 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4153 }
4154
4155 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4156 .getReg(0);
4157}
4158
4160 Register Index) {
4161 LLT EltTy = VecTy.getElementType();
4162
4163 // Calculate the element offset and add it to the pointer.
4164 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4165 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4166 "Converting bits to bytes lost precision");
4167
4169
4170 // Convert index to the correct size for the address space.
4172 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4173 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4174 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4175 if (IdxTy != MRI.getType(Index))
4177
4178 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4179 MIRBuilder.buildConstant(IdxTy, EltSize));
4180
4181 LLT PtrTy = MRI.getType(VecPtr);
4182 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4183}
4184
4185#ifndef NDEBUG
4186/// Check that all vector operands have same number of elements. Other operands
4187/// should be listed in NonVecOp.
4190 std::initializer_list<unsigned> NonVecOpIndices) {
4191 if (MI.getNumMemOperands() != 0)
4192 return false;
4193
4194 LLT VecTy = MRI.getType(MI.getReg(0));
4195 if (!VecTy.isVector())
4196 return false;
4197 unsigned NumElts = VecTy.getNumElements();
4198
4199 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4200 MachineOperand &Op = MI.getOperand(OpIdx);
4201 if (!Op.isReg()) {
4202 if (!is_contained(NonVecOpIndices, OpIdx))
4203 return false;
4204 continue;
4205 }
4206
4207 LLT Ty = MRI.getType(Op.getReg());
4208 if (!Ty.isVector()) {
4209 if (!is_contained(NonVecOpIndices, OpIdx))
4210 return false;
4211 continue;
4212 }
4213
4214 if (Ty.getNumElements() != NumElts)
4215 return false;
4216 }
4217
4218 return true;
4219}
4220#endif
4221
4222/// Fill \p DstOps with DstOps that have same number of elements combined as
4223/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4224/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4225/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4226static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4227 unsigned NumElts) {
4228 LLT LeftoverTy;
4229 assert(Ty.isVector() && "Expected vector type");
4230 LLT EltTy = Ty.getElementType();
4231 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4232 int NumParts, NumLeftover;
4233 std::tie(NumParts, NumLeftover) =
4234 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4235
4236 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4237 for (int i = 0; i < NumParts; ++i) {
4238 DstOps.push_back(NarrowTy);
4239 }
4240
4241 if (LeftoverTy.isValid()) {
4242 assert(NumLeftover == 1 && "expected exactly one leftover");
4243 DstOps.push_back(LeftoverTy);
4244 }
4245}
4246
4247/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4248/// made from \p Op depending on operand type.
4249static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4250 MachineOperand &Op) {
4251 for (unsigned i = 0; i < N; ++i) {
4252 if (Op.isReg())
4253 Ops.push_back(Op.getReg());
4254 else if (Op.isImm())
4255 Ops.push_back(Op.getImm());
4256 else if (Op.isPredicate())
4257 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4258 else
4259 llvm_unreachable("Unsupported type");
4260 }
4261}
4262
4263// Handle splitting vector operations which need to have the same number of
4264// elements in each type index, but each type index may have a different element
4265// type.
4266//
4267// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4268// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4269// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4270//
4271// Also handles some irregular breakdown cases, e.g.
4272// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4273// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4274// s64 = G_SHL s64, s32
4277 GenericMachineInstr &MI, unsigned NumElts,
4278 std::initializer_list<unsigned> NonVecOpIndices) {
4279 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4280 "Non-compatible opcode or not specified non-vector operands");
4281 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4282
4283 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4284 unsigned NumDefs = MI.getNumDefs();
4285
4286 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4287 // Build instructions with DstOps to use instruction found by CSE directly.
4288 // CSE copies found instruction into given vreg when building with vreg dest.
4289 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4290 // Output registers will be taken from created instructions.
4291 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4292 for (unsigned i = 0; i < NumDefs; ++i) {
4293 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4294 }
4295
4296 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4297 // Operands listed in NonVecOpIndices will be used as is without splitting;
4298 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4299 // scalar condition (op 1), immediate in sext_inreg (op 2).
4300 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4301 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4302 ++UseIdx, ++UseNo) {
4303 if (is_contained(NonVecOpIndices, UseIdx)) {
4304 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4305 MI.getOperand(UseIdx));
4306 } else {
4307 SmallVector<Register, 8> SplitPieces;
4308 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4309 MRI);
4310 for (auto Reg : SplitPieces)
4311 InputOpsPieces[UseNo].push_back(Reg);
4312 }
4313 }
4314
4315 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4316
4317 // Take i-th piece of each input operand split and build sub-vector/scalar
4318 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4319 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4321 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4322 Defs.push_back(OutputOpsPieces[DstNo][i]);
4323
4325 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4326 Uses.push_back(InputOpsPieces[InputNo][i]);
4327
4328 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4329 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4330 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4331 }
4332
4333 // Merge small outputs into MI's output for each def operand.
4334 if (NumLeftovers) {
4335 for (unsigned i = 0; i < NumDefs; ++i)
4336 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4337 } else {
4338 for (unsigned i = 0; i < NumDefs; ++i)
4339 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4340 }
4341
4342 MI.eraseFromParent();
4343 return Legalized;
4344}
4345
4348 unsigned NumElts) {
4349 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4350
4351 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4352 unsigned NumDefs = MI.getNumDefs();
4353
4354 SmallVector<DstOp, 8> OutputOpsPieces;
4355 SmallVector<Register, 8> OutputRegs;
4356 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4357
4358 // Instructions that perform register split will be inserted in basic block
4359 // where register is defined (basic block is in the next operand).
4360 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4361 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4362 UseIdx += 2, ++UseNo) {
4363 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4365 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4366 MIRBuilder, MRI);
4367 }
4368
4369 // Build PHIs with fewer elements.
4370 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4371 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4372 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4373 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4374 Phi.addDef(
4375 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4376 OutputRegs.push_back(Phi.getReg(0));
4377
4378 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4379 Phi.addUse(InputOpsPieces[j][i]);
4380 Phi.add(MI.getOperand(1 + j * 2 + 1));
4381 }
4382 }
4383
4384 // Set the insert point after the existing PHIs
4385 MachineBasicBlock &MBB = *MI.getParent();
4387
4388 // Merge small outputs into MI's def.
4389 if (NumLeftovers) {
4390 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4391 } else {
4392 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4393 }
4394
4395 MI.eraseFromParent();
4396 return Legalized;
4397}
4398
4401 unsigned TypeIdx,
4402 LLT NarrowTy) {
4403 const int NumDst = MI.getNumOperands() - 1;
4404 const Register SrcReg = MI.getOperand(NumDst).getReg();
4405 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4406 LLT SrcTy = MRI.getType(SrcReg);
4407
4408 if (TypeIdx != 1 || NarrowTy == DstTy)
4409 return UnableToLegalize;
4410
4411 // Requires compatible types. Otherwise SrcReg should have been defined by
4412 // merge-like instruction that would get artifact combined. Most likely
4413 // instruction that defines SrcReg has to perform more/fewer elements
4414 // legalization compatible with NarrowTy.
4415 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4416 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4417
4418 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4419 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4420 return UnableToLegalize;
4421
4422 // This is most likely DstTy (smaller then register size) packed in SrcTy
4423 // (larger then register size) and since unmerge was not combined it will be
4424 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4425 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4426
4427 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4428 //
4429 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4430 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4431 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4432 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4433 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4434 const int PartsPerUnmerge = NumDst / NumUnmerge;
4435
4436 for (int I = 0; I != NumUnmerge; ++I) {
4437 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4438
4439 for (int J = 0; J != PartsPerUnmerge; ++J)
4440 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4441 MIB.addUse(Unmerge.getReg(I));
4442 }
4443
4444 MI.eraseFromParent();
4445 return Legalized;
4446}
4447
4450 LLT NarrowTy) {
4451 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4452 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4453 // that should have been artifact combined. Most likely instruction that uses
4454 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4455 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4456 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4457 if (NarrowTy == SrcTy)
4458 return UnableToLegalize;
4459
4460 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4461 // is for old mir tests. Since the changes to more/fewer elements it should no
4462 // longer be possible to generate MIR like this when starting from llvm-ir
4463 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4464 if (TypeIdx == 1) {
4465 assert(SrcTy.isVector() && "Expected vector types");
4466 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4467 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4468 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4469 return UnableToLegalize;
4470 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4471 //
4472 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4473 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4474 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4475 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4476 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4477 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4478
4480 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4481 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4482 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4483 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4484 Elts.push_back(Unmerge.getReg(j));
4485 }
4486
4487 SmallVector<Register, 8> NarrowTyElts;
4488 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4489 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4490 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4491 ++i, Offset += NumNarrowTyElts) {
4492 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4493 NarrowTyElts.push_back(
4494 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4495 }
4496
4497 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4498 MI.eraseFromParent();
4499 return Legalized;
4500 }
4501
4502 assert(TypeIdx == 0 && "Bad type index");
4503 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4504 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4505 return UnableToLegalize;
4506
4507 // This is most likely SrcTy (smaller then register size) packed in DstTy
4508 // (larger then register size) and since merge was not combined it will be
4509 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4510 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4511
4512 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4513 //
4514 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4515 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4516 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4517 SmallVector<Register, 8> NarrowTyElts;
4518 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4519 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4520 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4521 for (unsigned i = 0; i < NumParts; ++i) {
4523 for (unsigned j = 0; j < NumElts; ++j)
4524 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4525 NarrowTyElts.push_back(
4526 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4527 }
4528
4529 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4530 MI.eraseFromParent();
4531 return Legalized;
4532}
4533
4536 unsigned TypeIdx,
4537 LLT NarrowVecTy) {
4538 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4539 Register InsertVal;
4540 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4541
4542 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4543 if (IsInsert)
4544 InsertVal = MI.getOperand(2).getReg();
4545
4546 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4547
4548 // TODO: Handle total scalarization case.
4549 if (!NarrowVecTy.isVector())
4550 return UnableToLegalize;
4551
4552 LLT VecTy = MRI.getType(SrcVec);
4553
4554 // If the index is a constant, we can really break this down as you would
4555 // expect, and index into the target size pieces.
4556 int64_t IdxVal;
4557 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4558 if (MaybeCst) {
4559 IdxVal = MaybeCst->Value.getSExtValue();
4560 // Avoid out of bounds indexing the pieces.
4561 if (IdxVal >= VecTy.getNumElements()) {
4562 MIRBuilder.buildUndef(DstReg);
4563 MI.eraseFromParent();
4564 return Legalized;
4565 }
4566
4567 SmallVector<Register, 8> VecParts;
4568 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4569
4570 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4571 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4572 TargetOpcode::G_ANYEXT);
4573
4574 unsigned NewNumElts = NarrowVecTy.getNumElements();
4575
4576 LLT IdxTy = MRI.getType(Idx);
4577 int64_t PartIdx = IdxVal / NewNumElts;
4578 auto NewIdx =
4579 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4580
4581 if (IsInsert) {
4582 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4583
4584 // Use the adjusted index to insert into one of the subvectors.
4585 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4586 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4587 VecParts[PartIdx] = InsertPart.getReg(0);
4588
4589 // Recombine the inserted subvector with the others to reform the result
4590 // vector.
4591 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4592 } else {
4593 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4594 }
4595
4596 MI.eraseFromParent();
4597 return Legalized;
4598 }
4599
4600 // With a variable index, we can't perform the operation in a smaller type, so
4601 // we're forced to expand this.
4602 //
4603 // TODO: We could emit a chain of compare/select to figure out which piece to
4604 // index.
4606}
4607
4610 LLT NarrowTy) {
4611 // FIXME: Don't know how to handle secondary types yet.
4612 if (TypeIdx != 0)
4613 return UnableToLegalize;
4614
4615 // This implementation doesn't work for atomics. Give up instead of doing
4616 // something invalid.
4617 if (LdStMI.isAtomic())
4618 return UnableToLegalize;
4619
4620 bool IsLoad = isa<GLoad>(LdStMI);
4621 Register ValReg = LdStMI.getReg(0);
4622 Register AddrReg = LdStMI.getPointerReg();
4623 LLT ValTy = MRI.getType(ValReg);
4624
4625 // FIXME: Do we need a distinct NarrowMemory legalize action?
4626 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4627 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4628 return UnableToLegalize;
4629 }
4630
4631 int NumParts = -1;
4632 int NumLeftover = -1;
4633 LLT LeftoverTy;
4634 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4635 if (IsLoad) {
4636 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4637 } else {
4638 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4639 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4640 NumParts = NarrowRegs.size();
4641 NumLeftover = NarrowLeftoverRegs.size();
4642 }
4643 }
4644
4645 if (NumParts == -1)
4646 return UnableToLegalize;
4647
4648 LLT PtrTy = MRI.getType(AddrReg);
4649 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4650
4651 unsigned TotalSize = ValTy.getSizeInBits();
4652
4653 // Split the load/store into PartTy sized pieces starting at Offset. If this
4654 // is a load, return the new registers in ValRegs. For a store, each elements
4655 // of ValRegs should be PartTy. Returns the next offset that needs to be
4656 // handled.
4658 auto MMO = LdStMI.getMMO();
4659 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4660 unsigned NumParts, unsigned Offset) -> unsigned {
4662 unsigned PartSize = PartTy.getSizeInBits();
4663 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4664 ++Idx) {
4665 unsigned ByteOffset = Offset / 8;
4666 Register NewAddrReg;
4667
4668 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4669
4670 MachineMemOperand *NewMMO =
4671 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4672
4673 if (IsLoad) {
4674 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4675 ValRegs.push_back(Dst);
4676 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4677 } else {
4678 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4679 }
4680 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4681 }
4682
4683 return Offset;
4684 };
4685
4686 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4687 unsigned HandledOffset =
4688 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4689
4690 // Handle the rest of the register if this isn't an even type breakdown.
4691 if (LeftoverTy.isValid())
4692 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4693
4694 if (IsLoad) {
4695 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4696 LeftoverTy, NarrowLeftoverRegs);
4697 }
4698
4699 LdStMI.eraseFromParent();
4700 return Legalized;
4701}
4702
4705 LLT NarrowTy) {
4706 using namespace TargetOpcode;
4707 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4708 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4709
4710 switch (MI.getOpcode()) {
4711 case G_IMPLICIT_DEF:
4712 case G_TRUNC:
4713 case G_AND:
4714 case G_OR:
4715 case G_XOR:
4716 case G_ADD:
4717 case G_SUB:
4718 case G_MUL:
4719 case G_PTR_ADD:
4720 case G_SMULH:
4721 case G_UMULH:
4722 case G_FADD:
4723 case G_FMUL:
4724 case G_FSUB:
4725 case G_FNEG:
4726 case G_FABS:
4727 case G_FCANONICALIZE:
4728 case G_FDIV:
4729 case G_FREM:
4730 case G_FMA:
4731 case G_FMAD:
4732 case G_FPOW:
4733 case G_FEXP:
4734 case G_FEXP2:
4735 case G_FEXP10:
4736 case G_FLOG:
4737 case G_FLOG2:
4738 case G_FLOG10:
4739 case G_FLDEXP:
4740 case G_FNEARBYINT:
4741 case G_FCEIL:
4742 case G_FFLOOR:
4743 case G_FRINT:
4744 case G_INTRINSIC_ROUND:
4745 case G_INTRINSIC_ROUNDEVEN:
4746 case G_INTRINSIC_TRUNC:
4747 case G_FCOS:
4748 case G_FSIN:
4749 case G_FTAN:
4750 case G_FACOS:
4751 case G_FASIN:
4752 case G_FATAN:
4753 case G_FCOSH:
4754 case G_FSINH:
4755 case G_FTANH:
4756 case G_FSQRT:
4757 case G_BSWAP:
4758 case G_BITREVERSE:
4759 case G_SDIV:
4760 case G_UDIV:
4761 case G_SREM:
4762 case G_UREM:
4763 case G_SDIVREM:
4764 case G_UDIVREM:
4765 case G_SMIN:
4766 case G_SMAX:
4767 case G_UMIN:
4768 case G_UMAX:
4769 case G_ABS:
4770 case G_FMINNUM:
4771 case G_FMAXNUM:
4772 case G_FMINNUM_IEEE:
4773 case G_FMAXNUM_IEEE:
4774 case G_FMINIMUM:
4775 case G_FMAXIMUM:
4776 case G_FSHL:
4777 case G_FSHR:
4778 case G_ROTL:
4779 case G_ROTR:
4780 case G_FREEZE:
4781 case G_SADDSAT:
4782 case G_SSUBSAT:
4783 case G_UADDSAT:
4784 case G_USUBSAT:
4785 case G_UMULO:
4786 case G_SMULO:
4787 case G_SHL:
4788 case G_LSHR:
4789 case G_ASHR:
4790 case G_SSHLSAT:
4791 case G_USHLSAT:
4792 case G_CTLZ:
4793 case G_CTLZ_ZERO_UNDEF:
4794 case G_CTTZ:
4795 case G_CTTZ_ZERO_UNDEF:
4796 case G_CTPOP:
4797 case G_FCOPYSIGN:
4798 case G_ZEXT:
4799 case G_SEXT:
4800 case G_ANYEXT:
4801 case G_FPEXT:
4802 case G_FPTRUNC:
4803 case G_SITOFP:
4804 case G_UITOFP:
4805 case G_FPTOSI:
4806 case G_FPTOUI:
4807 case G_INTTOPTR:
4808 case G_PTRTOINT:
4809 case G_ADDRSPACE_CAST:
4810 case G_UADDO:
4811 case G_USUBO:
4812 case G_UADDE:
4813 case G_USUBE:
4814 case G_SADDO:
4815 case G_SSUBO:
4816 case G_SADDE:
4817 case G_SSUBE:
4818 case G_STRICT_FADD:
4819 case G_STRICT_FSUB:
4820 case G_STRICT_FMUL:
4821 case G_STRICT_FMA:
4822 case G_STRICT_FLDEXP:
4823 case G_FFREXP:
4824 return fewerElementsVectorMultiEltType(GMI, NumElts);
4825 case G_ICMP:
4826 case G_FCMP:
4827 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4828 case G_IS_FPCLASS:
4829 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4830 case G_SELECT:
4831 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4832 return fewerElementsVectorMultiEltType(GMI, NumElts);
4833 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4834 case G_PHI:
4835 return fewerElementsVectorPhi(GMI, NumElts);
4836 case G_UNMERGE_VALUES:
4837 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4838 case G_BUILD_VECTOR:
4839 assert(TypeIdx == 0 && "not a vector type index");
4840 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4841 case G_CONCAT_VECTORS:
4842 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4843 return UnableToLegalize;
4844 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4845 case G_EXTRACT_VECTOR_ELT:
4846 case G_INSERT_VECTOR_ELT:
4847 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4848 case G_LOAD:
4849 case G_STORE:
4850 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4851 case G_SEXT_INREG:
4852 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4854 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4855 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4856 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4857 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4858 case G_SHUFFLE_VECTOR:
4859 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4860 case G_FPOWI:
4861 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4862 case G_BITCAST:
4863 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4864 case G_INTRINSIC_FPTRUNC_ROUND:
4865 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
4866 default:
4867 return UnableToLegalize;
4868 }
4869}
4870
4873 LLT NarrowTy) {
4874 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4875 "Not a bitcast operation");
4876
4877 if (TypeIdx != 0)
4878 return UnableToLegalize;
4879
4880 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4881
4882 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4883 LLT SrcNarrowTy =
4884 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4885
4886 // Split the Src and Dst Reg into smaller registers
4887 SmallVector<Register> SrcVRegs, BitcastVRegs;
4888 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4889 return UnableToLegalize;
4890
4891 // Build new smaller bitcast instructions
4892 // Not supporting Leftover types for now but will have to
4893 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4894 BitcastVRegs.push_back(
4895 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4896
4897 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4898 MI.eraseFromParent();
4899 return Legalized;
4900}
4901
4903 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4904 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4905 if (TypeIdx != 0)
4906 return UnableToLegalize;
4907
4908 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4909 MI.getFirst3RegLLTs();
4910 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4911 // The shuffle should be canonicalized by now.
4912 if (DstTy != Src1Ty)
4913 return UnableToLegalize;
4914 if (DstTy != Src2Ty)
4915 return UnableToLegalize;
4916
4917 if (!isPowerOf2_32(DstTy.getNumElements()))
4918 return UnableToLegalize;
4919
4920 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4921 // Further legalization attempts will be needed to do split further.
4922 NarrowTy =
4923 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4924 unsigned NewElts = NarrowTy.getNumElements();
4925
4926 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4927 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4928 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4929 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4930 SplitSrc2Regs[1]};
4931
4932 Register Hi, Lo;
4933
4934 // If Lo or Hi uses elements from at most two of the four input vectors, then
4935 // express it as a vector shuffle of those two inputs. Otherwise extract the
4936 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4938 for (unsigned High = 0; High < 2; ++High) {
4939 Register &Output = High ? Hi : Lo;
4940
4941 // Build a shuffle mask for the output, discovering on the fly which
4942 // input vectors to use as shuffle operands (recorded in InputUsed).
4943 // If building a suitable shuffle vector proves too hard, then bail
4944 // out with useBuildVector set.
4945 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4946 unsigned FirstMaskIdx = High * NewElts;
4947 bool UseBuildVector = false;
4948 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4949 // The mask element. This indexes into the input.
4950 int Idx = Mask[FirstMaskIdx + MaskOffset];
4951
4952 // The input vector this mask element indexes into.
4953 unsigned Input = (unsigned)Idx / NewElts;
4954
4955 if (Input >= std::size(Inputs)) {
4956 // The mask element does not index into any input vector.
4957 Ops.push_back(-1);
4958 continue;
4959 }
4960
4961 // Turn the index into an offset from the start of the input vector.
4962 Idx -= Input * NewElts;
4963
4964 // Find or create a shuffle vector operand to hold this input.
4965 unsigned OpNo;
4966 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4967 if (InputUsed[OpNo] == Input) {
4968 // This input vector is already an operand.
4969 break;
4970 } else if (InputUsed[OpNo] == -1U) {
4971 // Create a new operand for this input vector.
4972 InputUsed[OpNo] = Input;
4973 break;
4974 }
4975 }
4976
4977 if (OpNo >= std::size(InputUsed)) {
4978 // More than two input vectors used! Give up on trying to create a
4979 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4980 UseBuildVector = true;
4981 break;
4982 }
4983
4984 // Add the mask index for the new shuffle vector.
4985 Ops.push_back(Idx + OpNo * NewElts);
4986 }
4987
4988 if (UseBuildVector) {
4989 LLT EltTy = NarrowTy.getElementType();
4991
4992 // Extract the input elements by hand.
4993 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4994 // The mask element. This indexes into the input.
4995 int Idx = Mask[FirstMaskIdx + MaskOffset];
4996
4997 // The input vector this mask element indexes into.
4998 unsigned Input = (unsigned)Idx / NewElts;
4999
5000 if (Input >= std::size(Inputs)) {
5001 // The mask element is "undef" or indexes off the end of the input.
5002 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5003 continue;
5004 }
5005
5006 // Turn the index into an offset from the start of the input vector.
5007 Idx -= Input * NewElts;
5008
5009 // Extract the vector element by hand.
5010 SVOps.push_back(MIRBuilder
5011 .buildExtractVectorElement(
5012 EltTy, Inputs[Input],
5014 .getReg(0));
5015 }
5016
5017 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5018 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5019 } else if (InputUsed[0] == -1U) {
5020 // No input vectors were used! The result is undefined.
5021 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5022 } else {
5023 Register Op0 = Inputs[InputUsed[0]];
5024 // If only one input was used, use an undefined vector for the other.
5025 Register Op1 = InputUsed[1] == -1U
5026 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5027 : Inputs[InputUsed[1]];
5028 // At least one input vector was used. Create a new shuffle vector.
5029 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5030 }
5031
5032 Ops.clear();
5033 }
5034
5035 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
5036 MI.eraseFromParent();
5037 return Legalized;
5038}
5039
5041 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5042 auto &RdxMI = cast<GVecReduce>(MI);
5043
5044 if (TypeIdx != 1)
5045 return UnableToLegalize;
5046
5047 // The semantics of the normal non-sequential reductions allow us to freely
5048 // re-associate the operation.
5049 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5050
5051 if (NarrowTy.isVector() &&
5052 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5053 return UnableToLegalize;
5054
5055 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5056 SmallVector<Register> SplitSrcs;
5057 // If NarrowTy is a scalar then we're being asked to scalarize.
5058 const unsigned NumParts =
5059 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5060 : SrcTy.getNumElements();
5061
5062 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5063 if (NarrowTy.isScalar()) {
5064 if (DstTy != NarrowTy)
5065 return UnableToLegalize; // FIXME: handle implicit extensions.
5066
5067 if (isPowerOf2_32(NumParts)) {
5068 // Generate a tree of scalar operations to reduce the critical path.
5069 SmallVector<Register> PartialResults;
5070 unsigned NumPartsLeft = NumParts;
5071 while (NumPartsLeft > 1) {
5072 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5073 PartialResults.emplace_back(
5075 .buildInstr(ScalarOpc, {NarrowTy},
5076 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5077 .getReg(0));
5078 }
5079 SplitSrcs = PartialResults;
5080 PartialResults.clear();
5081 NumPartsLeft = SplitSrcs.size();
5082 }
5083 assert(SplitSrcs.size() == 1);
5084 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5085 MI.eraseFromParent();
5086 return Legalized;
5087 }
5088 // If we can't generate a tree, then just do sequential operations.
5089 Register Acc = SplitSrcs[0];
5090 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5091 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5092 .getReg(0);
5093 MIRBuilder.buildCopy(DstReg, Acc);
5094 MI.eraseFromParent();
5095 return Legalized;
5096 }
5097 SmallVector<Register> PartialReductions;
5098 for (unsigned Part = 0; Part < NumParts; ++Part) {
5099 PartialReductions.push_back(
5100 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5101 .getReg(0));
5102 }
5103
5104 // If the types involved are powers of 2, we can generate intermediate vector
5105 // ops, before generating a final reduction operation.
5106 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5107 isPowerOf2_32(NarrowTy.getNumElements())) {
5108 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5109 }
5110
5111 Register Acc = PartialReductions[0];
5112 for (unsigned Part = 1; Part < NumParts; ++Part) {
5113 if (Part == NumParts - 1) {
5114 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5115 {Acc, PartialReductions[Part]});
5116 } else {
5117 Acc = MIRBuilder
5118 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5119 .getReg(0);
5120 }
5121 }
5122 MI.eraseFromParent();
5123 return Legalized;
5124}
5125
5128 unsigned int TypeIdx,
5129 LLT NarrowTy) {
5130 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5131 MI.getFirst3RegLLTs();
5132 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5133 DstTy != NarrowTy)
5134 return UnableToLegalize;
5135
5136 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5137 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5138 "Unexpected vecreduce opcode");
5139 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5140 ? TargetOpcode::G_FADD
5141 : TargetOpcode::G_FMUL;
5142
5143 SmallVector<Register> SplitSrcs;
5144 unsigned NumParts = SrcTy.getNumElements();
5145 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5146 Register Acc = ScalarReg;
5147 for (unsigned i = 0; i < NumParts; i++)
5148 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5149 .getReg(0);
5150
5151 MIRBuilder.buildCopy(DstReg, Acc);
5152 MI.eraseFromParent();
5153 return Legalized;
5154}
5155
5157LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5158 LLT SrcTy, LLT NarrowTy,
5159 unsigned ScalarOpc) {
5160 SmallVector<Register> SplitSrcs;
5161 // Split the sources into NarrowTy size pieces.
5162 extractParts(SrcReg, NarrowTy,
5163 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5164 MIRBuilder, MRI);
5165 // We're going to do a tree reduction using vector operations until we have
5166 // one NarrowTy size value left.
5167 while (SplitSrcs.size() > 1) {
5168 SmallVector<Register> PartialRdxs;
5169 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5170 Register LHS = SplitSrcs[Idx];
5171 Register RHS = SplitSrcs[Idx + 1];
5172 // Create the intermediate vector op.
5173 Register Res =
5174 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5175 PartialRdxs.push_back(Res);
5176 }
5177 SplitSrcs = std::move(PartialRdxs);
5178 }
5179 // Finally generate the requested NarrowTy based reduction.
5181 MI.getOperand(1).setReg(SplitSrcs[0]);
5183 return Legalized;
5184}
5185
5188 const LLT HalfTy, const LLT AmtTy) {
5189
5190 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5191 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5192 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5193
5194 if (Amt.isZero()) {
5195 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5196 MI.eraseFromParent();
5197 return Legalized;
5198 }
5199
5200 LLT NVT = HalfTy;
5201 unsigned NVTBits = HalfTy.getSizeInBits();
5202 unsigned VTBits = 2 * NVTBits;
5203
5204 SrcOp Lo(Register(0)), Hi(Register(0));
5205 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5206 if (Amt.ugt(VTBits)) {
5207 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5208 } else if (Amt.ugt(NVTBits)) {
5209 Lo = MIRBuilder.buildConstant(NVT, 0);
5210 Hi = MIRBuilder.buildShl(NVT, InL,
5211 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5212 } else if (Amt == NVTBits) {
5213 Lo = MIRBuilder.buildConstant(NVT, 0);
5214 Hi = InL;
5215 } else {
5216 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5217 auto OrLHS =
5218 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5219 auto OrRHS = MIRBuilder.buildLShr(
5220 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5221 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5222 }
5223 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5224 if (Amt.ugt(VTBits)) {
5225 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5226 } else if (Amt.ugt(NVTBits)) {
5227 Lo = MIRBuilder.buildLShr(NVT, InH,
5228 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5229 Hi = MIRBuilder.buildConstant(NVT, 0);
5230 } else if (Amt == NVTBits) {
5231 Lo = InH;
5232 Hi = MIRBuilder.buildConstant(NVT, 0);
5233 } else {
5234 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5235
5236 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5237 auto OrRHS = MIRBuilder.buildShl(
5238 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5239
5240 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5241 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5242 }
5243 } else {
5244 if (Amt.ugt(VTBits)) {
5246 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5247 } else if (Amt.ugt(NVTBits)) {
5248 Lo = MIRBuilder.buildAShr(NVT, InH,
5249 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5250 Hi = MIRBuilder.buildAShr(NVT, InH,
5251 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5252 } else if (Amt == NVTBits) {
5253 Lo = InH;
5254 Hi = MIRBuilder.buildAShr(NVT, InH,
5255 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5256 } else {
5257 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5258
5259 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5260 auto OrRHS = MIRBuilder.buildShl(
5261 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5262
5263 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5264 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5265 }
5266 }
5267
5268 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5269 MI.eraseFromParent();
5270
5271 return Legalized;
5272}
5273
5274// TODO: Optimize if constant shift amount.
5277 LLT RequestedTy) {
5278 if (TypeIdx == 1) {
5280 narrowScalarSrc(MI, RequestedTy, 2);
5282 return Legalized;
5283 }
5284
5285 Register DstReg = MI.getOperand(0).getReg();
5286 LLT DstTy = MRI.getType(DstReg);
5287 if (DstTy.isVector())
5288 return UnableToLegalize;
5289
5290 Register Amt = MI.getOperand(2).getReg();
5291 LLT ShiftAmtTy = MRI.getType(Amt);
5292 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5293 if (DstEltSize % 2 != 0)
5294 return UnableToLegalize;
5295
5296 // Ignore the input type. We can only go to exactly half the size of the
5297 // input. If that isn't small enough, the resulting pieces will be further
5298 // legalized.
5299 const unsigned NewBitSize = DstEltSize / 2;
5300 const LLT HalfTy = LLT::scalar(NewBitSize);
5301 const LLT CondTy = LLT::scalar(1);
5302
5303 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5304 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5305 ShiftAmtTy);
5306 }
5307
5308 // TODO: Expand with known bits.
5309
5310 // Handle the fully general expansion by an unknown amount.
5311 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5312
5313 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5314 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5315 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5316
5317 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5318 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5319
5320 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5321 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5322 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5323
5324 Register ResultRegs[2];
5325 switch (MI.getOpcode()) {
5326 case TargetOpcode::G_SHL: {
5327 // Short: ShAmt < NewBitSize
5328 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5329
5330 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5331 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5332 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5333
5334 // Long: ShAmt >= NewBitSize
5335 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5336 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5337
5338 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5339 auto Hi = MIRBuilder.buildSelect(
5340 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5341
5342 ResultRegs[0] = Lo.getReg(0);
5343 ResultRegs[1] = Hi.getReg(0);
5344 break;
5345 }
5346 case TargetOpcode::G_LSHR:
5347 case TargetOpcode::G_ASHR: {
5348 // Short: ShAmt < NewBitSize
5349 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5350
5351 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5352 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5353 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5354
5355 // Long: ShAmt >= NewBitSize
5357 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5358 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5359 } else {
5360 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5361 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5362 }
5363 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5364 {InH, AmtExcess}); // Lo from Hi part.
5365
5366 auto Lo = MIRBuilder.buildSelect(
5367 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5368
5369 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5370
5371 ResultRegs[0] = Lo.getReg(0);
5372 ResultRegs[1] = Hi.getReg(0);
5373 break;
5374 }
5375 default:
5376 llvm_unreachable("not a shift");
5377 }
5378
5379 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5380 MI.eraseFromParent();
5381 return Legalized;
5382}
5383
5386 LLT MoreTy) {
5387 assert(TypeIdx == 0 && "Expecting only Idx 0");
5388
5390 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5391 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5393 moreElementsVectorSrc(MI, MoreTy, I);
5394 }
5395
5396 MachineBasicBlock &MBB = *MI.getParent();
5398 moreElementsVectorDst(MI, MoreTy, 0);
5400 return Legalized;
5401}
5402
5403MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5404 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5405 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5406
5407 switch (Opcode) {
5408 default:
5410 "getNeutralElementForVecReduce called with invalid opcode!");
5411 case TargetOpcode::G_VECREDUCE_ADD:
5412 case TargetOpcode::G_VECREDUCE_OR:
5413 case TargetOpcode::G_VECREDUCE_XOR:
5414 case TargetOpcode::G_VECREDUCE_UMAX:
5415 return MIRBuilder.buildConstant(Ty, 0);
5416 case TargetOpcode::G_VECREDUCE_MUL:
5417 return MIRBuilder.buildConstant(Ty, 1);
5418 case TargetOpcode::G_VECREDUCE_AND:
5419 case TargetOpcode::G_VECREDUCE_UMIN:
5422 case TargetOpcode::G_VECREDUCE_SMAX:
5425 case TargetOpcode::G_VECREDUCE_SMIN:
5428 case TargetOpcode::G_VECREDUCE_FADD:
5429 return MIRBuilder.buildFConstant(Ty, -0.0);
5430 case TargetOpcode::G_VECREDUCE_FMUL:
5431 return MIRBuilder.buildFConstant(Ty, 1.0);
5432 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5433 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5434 assert(false && "getNeutralElementForVecReduce unimplemented for "
5435 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5436 }
5437 llvm_unreachable("switch expected to return!");
5438}
5439
5442 LLT MoreTy) {
5443 unsigned Opc = MI.getOpcode();
5444 switch (Opc) {
5445 case TargetOpcode::G_IMPLICIT_DEF:
5446 case TargetOpcode::G_LOAD: {
5447 if (TypeIdx != 0)
5448 return UnableToLegalize;
5450 moreElementsVectorDst(MI, MoreTy, 0);
5452 return Legalized;
5453 }
5454 case TargetOpcode::G_STORE:
5455 if (TypeIdx != 0)
5456 return UnableToLegalize;
5458 moreElementsVectorSrc(MI, MoreTy, 0);
5460 return Legalized;
5461 case TargetOpcode::G_AND:
5462 case TargetOpcode::G_OR:
5463 case TargetOpcode::G_XOR:
5464 case TargetOpcode::G_ADD:
5465 case TargetOpcode::G_SUB:
5466 case TargetOpcode::G_MUL:
5467 case TargetOpcode::G_FADD:
5468 case TargetOpcode::G_FSUB:
5469 case TargetOpcode::G_FMUL:
5470 case TargetOpcode::G_FDIV:
5471 case TargetOpcode::G_FCOPYSIGN:
5472 case TargetOpcode::G_UADDSAT:
5473 case TargetOpcode::G_USUBSAT:
5474 case TargetOpcode::G_SADDSAT:
5475 case TargetOpcode::G_SSUBSAT:
5476 case TargetOpcode::G_SMIN:
5477 case TargetOpcode::G_SMAX:
5478 case TargetOpcode::G_UMIN:
5479 case TargetOpcode::G_UMAX:
5480 case TargetOpcode::G_FMINNUM:
5481 case TargetOpcode::G_FMAXNUM:
5482 case TargetOpcode::G_FMINNUM_IEEE:
5483 case TargetOpcode::G_FMAXNUM_IEEE:
5484 case TargetOpcode::G_FMINIMUM:
5485 case TargetOpcode::G_FMAXIMUM:
5486 case TargetOpcode::G_STRICT_FADD:
5487 case TargetOpcode::G_STRICT_FSUB:
5488 case TargetOpcode::G_STRICT_FMUL:
5489 case TargetOpcode::G_SHL:
5490 case TargetOpcode::G_ASHR:
5491 case TargetOpcode::G_LSHR: {
5493 moreElementsVectorSrc(MI, MoreTy, 1);
5494 moreElementsVectorSrc(MI, MoreTy, 2);
5495 moreElementsVectorDst(MI, MoreTy, 0);
5497 return Legalized;
5498 }
5499 case TargetOpcode::G_FMA:
5500 case TargetOpcode::G_STRICT_FMA:
5501 case TargetOpcode::G_FSHR:
5502 case TargetOpcode::G_FSHL: {
5504 moreElementsVectorSrc(MI, MoreTy, 1);
5505 moreElementsVectorSrc(MI, MoreTy, 2);
5506 moreElementsVectorSrc(MI, MoreTy, 3);
5507 moreElementsVectorDst(MI, MoreTy, 0);
5509 return Legalized;
5510 }
5511 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5512 case TargetOpcode::G_EXTRACT:
5513 if (TypeIdx != 1)
5514 return UnableToLegalize;
5516 moreElementsVectorSrc(MI, MoreTy, 1);
5518 return Legalized;
5519 case TargetOpcode::G_INSERT:
5520 case TargetOpcode::G_INSERT_VECTOR_ELT:
5521 case TargetOpcode::G_FREEZE:
5522 case TargetOpcode::G_FNEG:
5523 case TargetOpcode::G_FABS:
5524 case TargetOpcode::G_FSQRT:
5525 case TargetOpcode::G_FCEIL:
5526 case TargetOpcode::G_FFLOOR:
5527 case TargetOpcode::G_FNEARBYINT:
5528 case TargetOpcode::G_FRINT:
5529 case TargetOpcode::G_INTRINSIC_ROUND:
5530 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5531 case TargetOpcode::G_INTRINSIC_TRUNC:
5532 case TargetOpcode::G_BSWAP:
5533 case TargetOpcode::G_FCANONICALIZE:
5534 case TargetOpcode::G_SEXT_INREG:
5535 case TargetOpcode::G_ABS:
5536 if (TypeIdx != 0)
5537 return UnableToLegalize;
5539 moreElementsVectorSrc(MI, MoreTy, 1);
5540 moreElementsVectorDst(MI, MoreTy, 0);
5542 return Legalized;
5543 case TargetOpcode::G_SELECT: {
5544 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5545 if (TypeIdx == 1) {
5546 if (!CondTy.isScalar() ||
5547 DstTy.getElementCount() != MoreTy.getElementCount())
5548 return UnableToLegalize;
5549
5550 // This is turning a scalar select of vectors into a vector
5551 // select. Broadcast the select condition.
5552 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5554 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5556 return Legalized;
5557 }
5558
5559 if (CondTy.isVector())
5560 return UnableToLegalize;
5561
5563 moreElementsVectorSrc(MI, MoreTy, 2);
5564 moreElementsVectorSrc(MI, MoreTy, 3);
5565 moreElementsVectorDst(MI, MoreTy, 0);
5567 return Legalized;
5568 }
5569 case TargetOpcode::G_UNMERGE_VALUES:
5570 return UnableToLegalize;
5571 case TargetOpcode::G_PHI:
5572 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5573 case TargetOpcode::G_SHUFFLE_VECTOR:
5574 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5575 case TargetOpcode::G_BUILD_VECTOR: {
5577 for (auto Op : MI.uses()) {
5578 Elts.push_back(Op.getReg());
5579 }
5580
5581 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5583 }
5584
5586 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5587 MI.eraseFromParent();
5588 return Legalized;
5589 }
5590 case TargetOpcode::G_SEXT:
5591 case TargetOpcode::G_ZEXT:
5592 case TargetOpcode::G_ANYEXT:
5593 case TargetOpcode::G_TRUNC:
5594 case TargetOpcode::G_FPTRUNC:
5595 case TargetOpcode::G_FPEXT:
5596 case TargetOpcode::G_FPTOSI:
5597 case TargetOpcode::G_FPTOUI:
5598 case TargetOpcode::G_SITOFP:
5599 case TargetOpcode::G_UITOFP: {
5601 LLT SrcExtTy;
5602 LLT DstExtTy;
5603 if (TypeIdx == 0) {
5604 DstExtTy = MoreTy;
5605 SrcExtTy = LLT::fixed_vector(
5606 MoreTy.getNumElements(),
5607 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5608 } else {
5609 DstExtTy = LLT::fixed_vector(
5610 MoreTy.getNumElements(),
5611 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5612 SrcExtTy = MoreTy;
5613 }
5614 moreElementsVectorSrc(MI, SrcExtTy, 1);
5615 moreElementsVectorDst(MI, DstExtTy, 0);
5617 return Legalized;
5618 }
5619 case TargetOpcode::G_ICMP:
5620 case TargetOpcode::G_FCMP: {
5621 if (TypeIdx != 1)
5622 return UnableToLegalize;
5623
5625 moreElementsVectorSrc(MI, MoreTy, 2);
5626 moreElementsVectorSrc(MI, MoreTy, 3);
5627 LLT CondTy = LLT::fixed_vector(
5628 MoreTy.getNumElements(),
5629 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5630 moreElementsVectorDst(MI, CondTy, 0);
5632 return Legalized;
5633 }
5634 case TargetOpcode::G_BITCAST: {
5635 if (TypeIdx != 0)
5636 return UnableToLegalize;
5637
5638 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5639 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5640
5641 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5642 if (coefficient % DstTy.getNumElements() != 0)
5643 return UnableToLegalize;
5644
5645 coefficient = coefficient / DstTy.getNumElements();
5646
5647 LLT NewTy = SrcTy.changeElementCount(
5648 ElementCount::get(coefficient, MoreTy.isScalable()));
5650 moreElementsVectorSrc(MI, NewTy, 1);
5651 moreElementsVectorDst(MI, MoreTy, 0);
5653 return Legalized;
5654 }
5655 case TargetOpcode::G_VECREDUCE_FADD:
5656 case TargetOpcode::G_VECREDUCE_FMUL:
5657 case TargetOpcode::G_VECREDUCE_ADD:
5658 case TargetOpcode::G_VECREDUCE_MUL:
5659 case TargetOpcode::G_VECREDUCE_AND:
5660 case TargetOpcode::G_VECREDUCE_OR:
5661 case TargetOpcode::G_VECREDUCE_XOR:
5662 case TargetOpcode::G_VECREDUCE_SMAX:
5663 case TargetOpcode::G_VECREDUCE_SMIN:
5664 case TargetOpcode::G_VECREDUCE_UMAX:
5665 case TargetOpcode::G_VECREDUCE_UMIN: {
5666 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5667 MachineOperand &MO = MI.getOperand(1);
5668 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5669 auto NeutralElement = getNeutralElementForVecReduce(
5670 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5671
5673 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5674 i != e; i++) {
5675 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5676 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5677 NeutralElement, Idx);
5678 }
5679
5681 MO.setReg(NewVec.getReg(0));
5683 return Legalized;
5684 }
5685
5686 default:
5687 return UnableToLegalize;
5688 }
5689}
5690
5693 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5694 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5695 unsigned MaskNumElts = Mask.size();
5696 unsigned SrcNumElts = SrcTy.getNumElements();
5697 LLT DestEltTy = DstTy.getElementType();
5698
5699 if (MaskNumElts == SrcNumElts)
5700 return Legalized;
5701
5702 if (MaskNumElts < SrcNumElts) {
5703 // Extend mask to match new destination vector size with
5704 // undef values.
5705 SmallVector<int, 16> NewMask(Mask);
5706 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5707 NewMask.push_back(-1);
5708
5709 moreElementsVectorDst(MI, SrcTy, 0);
5711 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5712 MI.getOperand(1).getReg(),
5713 MI.getOperand(2).getReg(), NewMask);
5714 MI.eraseFromParent();
5715
5716 return Legalized;
5717 }
5718
5719 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5720 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5721 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5722
5723 // Create new source vectors by concatenating the initial
5724 // source vectors with undefined vectors of the same size.
5725 auto Undef = MIRBuilder.buildUndef(SrcTy);
5726 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5727 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5728 MOps1[0] = MI.getOperand(1).getReg();
5729 MOps2[0] = MI.getOperand(2).getReg();
5730
5731 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5732 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5733
5734 // Readjust mask for new input vector length.
5735 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5736 for (unsigned I = 0; I != MaskNumElts; ++I) {
5737 int Idx = Mask[I];
5738 if (Idx >= static_cast<int>(SrcNumElts))
5739 Idx += PaddedMaskNumElts - SrcNumElts;
5740 MappedOps[I] = Idx;
5741 }
5742
5743 // If we got more elements than required, extract subvector.
5744 if (MaskNumElts != PaddedMaskNumElts) {
5745 auto Shuffle =
5746 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5747
5748 SmallVector<Register, 16> Elts(MaskNumElts);
5749 for (unsigned I = 0; I < MaskNumElts; ++I) {
5750 Elts[I] =
5752 .getReg(0);
5753 }
5754 MIRBuilder.buildBuildVector(DstReg, Elts);
5755 } else {
5756 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5757 }
5758
5759 MI.eraseFromParent();
5761}
5762
5765 unsigned int TypeIdx, LLT MoreTy) {
5766 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5767 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5768 unsigned NumElts = DstTy.getNumElements();
5769 unsigned WidenNumElts = MoreTy.getNumElements();
5770
5771 if (DstTy.isVector() && Src1Ty.isVector() &&
5772 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5774 }
5775
5776 if (TypeIdx != 0)
5777 return UnableToLegalize;
5778
5779 // Expect a canonicalized shuffle.
5780 if (DstTy != Src1Ty || DstTy != Src2Ty)
5781 return UnableToLegalize;
5782
5783 moreElementsVectorSrc(MI, MoreTy, 1);
5784 moreElementsVectorSrc(MI, MoreTy, 2);
5785
5786 // Adjust mask based on new input vector length.
5787 SmallVector<int, 16> NewMask;
5788 for (unsigned I = 0; I != NumElts; ++I) {
5789 int Idx = Mask[I];
5790 if (Idx < static_cast<int>(NumElts))
5791 NewMask.push_back(Idx);
5792 else
5793 NewMask.push_back(Idx - NumElts + WidenNumElts);
5794 }
5795 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5796 NewMask.push_back(-1);
5797 moreElementsVectorDst(MI, MoreTy, 0);
5799 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5800 MI.getOperand(1).getReg(),
5801 MI.getOperand(2).getReg(), NewMask);
5802 MI.eraseFromParent();
5803 return Legalized;
5804}
5805
5806void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5807 ArrayRef<Register> Src1Regs,
5808 ArrayRef<Register> Src2Regs,
5809 LLT NarrowTy) {
5811 unsigned SrcParts = Src1Regs.size();
5812 unsigned DstParts = DstRegs.size();
5813
5814 unsigned DstIdx = 0; // Low bits of the result.
5815 Register FactorSum =
5816 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5817 DstRegs[DstIdx] = FactorSum;
5818
5819 unsigned CarrySumPrevDstIdx;
5821
5822 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5823 // Collect low parts of muls for DstIdx.
5824 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5825 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5827 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5828 Factors.push_back(Mul.getReg(0));
5829 }
5830 // Collect high parts of muls from previous DstIdx.
5831 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5832 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5833 MachineInstrBuilder Umulh =
5834 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5835 Factors.push_back(Umulh.getReg(0));
5836 }
5837 // Add CarrySum from additions calculated for previous DstIdx.
5838 if (DstIdx != 1) {
5839 Factors.push_back(CarrySumPrevDstIdx);
5840 }
5841
5842 Register CarrySum;
5843 // Add all factors and accumulate all carries into CarrySum.
5844 if (DstIdx != DstParts - 1) {
5845 MachineInstrBuilder Uaddo =
5846 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5847 FactorSum = Uaddo.getReg(0);
5848 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5849 for (unsigned i = 2; i < Factors.size(); ++i) {
5850 MachineInstrBuilder Uaddo =
5851 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5852 FactorSum = Uaddo.getReg(0);
5853 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5854 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5855 }
5856 } else {
5857 // Since value for the next index is not calculated, neither is CarrySum.
5858 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5859 for (unsigned i = 2; i < Factors.size(); ++i)
5860 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5861 }
5862
5863 CarrySumPrevDstIdx = CarrySum;
5864 DstRegs[DstIdx] = FactorSum;
5865 Factors.clear();
5866 }
5867}
5868
5871 LLT NarrowTy) {
5872 if (TypeIdx != 0)
5873 return UnableToLegalize;
5874
5875 Register DstReg = MI.getOperand(0).getReg();
5876 LLT DstType = MRI.getType(DstReg);
5877 // FIXME: add support for vector types
5878 if (DstType.isVector())
5879 return UnableToLegalize;
5880
5881 unsigned Opcode = MI.getOpcode();
5882 unsigned OpO, OpE, OpF;
5883 switch (Opcode) {
5884 case TargetOpcode::G_SADDO:
5885 case TargetOpcode::G_SADDE:
5886 case TargetOpcode::G_UADDO:
5887 case TargetOpcode::G_UADDE:
5888 case TargetOpcode::G_ADD:
5889 OpO = TargetOpcode::G_UADDO;
5890 OpE = TargetOpcode::G_UADDE;
5891 OpF = TargetOpcode::G_UADDE;
5892 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5893 OpF = TargetOpcode::G_SADDE;
5894 break;
5895 case TargetOpcode::G_SSUBO:
5896 case TargetOpcode::G_SSUBE:
5897 case TargetOpcode::G_USUBO:
5898 case TargetOpcode::G_USUBE:
5899 case TargetOpcode::G_SUB:
5900 OpO = TargetOpcode::G_USUBO;
5901 OpE = TargetOpcode::G_USUBE;
5902 OpF = TargetOpcode::G_USUBE;
5903 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5904 OpF = TargetOpcode::G_SSUBE;
5905 break;
5906 default:
5907 llvm_unreachable("Unexpected add/sub opcode!");
5908 }
5909
5910 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5911 unsigned NumDefs = MI.getNumExplicitDefs();
5912 Register Src1 = MI.getOperand(NumDefs).getReg();
5913 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5914 Register CarryDst, CarryIn;
5915 if (NumDefs == 2)
5916 CarryDst = MI.getOperand(1).getReg();
5917 if (MI.getNumOperands() == NumDefs + 3)
5918 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5919
5920 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5921 LLT LeftoverTy, DummyTy;
5922 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5923 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5924 MIRBuilder, MRI);
5925 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5926 MRI);
5927
5928 int NarrowParts = Src1Regs.size();
5929 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5930 Src1Regs.push_back(Src1Left[I]);
5931 Src2Regs.push_back(Src2Left[I]);
5932 }
5933 DstRegs.reserve(Src1Regs.size());
5934
5935 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5936 Register DstReg =
5937 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5939 // Forward the final carry-out to the destination register
5940 if (i == e - 1 && CarryDst)
5941 CarryOut = CarryDst;
5942
5943 if (!CarryIn) {
5944 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5945 {Src1Regs[i], Src2Regs[i]});
5946 } else if (i == e - 1) {
5947 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5948 {Src1Regs[i], Src2Regs[i], CarryIn});
5949 } else {
5950 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5951 {Src1Regs[i], Src2Regs[i], CarryIn});
5952 }
5953
5954 DstRegs.push_back(DstReg);
5955 CarryIn = CarryOut;
5956 }
5957 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5958 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5959 ArrayRef(DstRegs).drop_front(NarrowParts));
5960
5961 MI.eraseFromParent();
5962 return Legalized;
5963}
5964
5967 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5968
5969 LLT Ty = MRI.getType(DstReg);
5970 if (Ty.isVector())
5971 return UnableToLegalize;
5972
5973 unsigned Size = Ty.getSizeInBits();
5974 unsigned NarrowSize = NarrowTy.getSizeInBits();
5975 if (Size % NarrowSize != 0)
5976 return UnableToLegalize;
5977
5978 unsigned NumParts = Size / NarrowSize;
5979 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5980 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5981
5982 SmallVector<Register, 2> Src1Parts, Src2Parts;
5983 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5984 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5985 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5986 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5987
5988 // Take only high half of registers if this is high mul.
5989 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5990 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5991 MI.eraseFromParent();
5992 return Legalized;
5993}
5994
5997 LLT NarrowTy) {
5998 if (TypeIdx != 0)
5999 return UnableToLegalize;
6000
6001 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6002
6003 Register Src = MI.getOperand(1).getReg();
6004 LLT SrcTy = MRI.getType(Src);
6005
6006 // If all finite floats fit into the narrowed integer type, we can just swap
6007 // out the result type. This is practically only useful for conversions from
6008 // half to at least 16-bits, so just handle the one case.
6009 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6010 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6011 return UnableToLegalize;
6012
6014 narrowScalarDst(MI, NarrowTy, 0,
6015 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6017 return Legalized;
6018}
6019
6022 LLT NarrowTy) {
6023 if (TypeIdx != 1)
6024 return UnableToLegalize;
6025
6026 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6027
6028 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6029 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6030 // NarrowSize.
6031 if (SizeOp1 % NarrowSize != 0)
6032 return UnableToLegalize;
6033 int NumParts = SizeOp1 / NarrowSize;
6034
6035 SmallVector<Register, 2> SrcRegs, DstRegs;
6037 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6038 MIRBuilder, MRI);
6039
6040 Register OpReg = MI.getOperand(0).getReg();
6041 uint64_t OpStart = MI.getOperand(2).getImm();
6042 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6043 for (int i = 0; i < NumParts; ++i) {
6044 unsigned SrcStart = i * NarrowSize;
6045
6046 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6047 // No part of the extract uses this subregister, ignore it.
6048 continue;
6049 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6050 // The entire subregister is extracted, forward the value.
6051 DstRegs.push_back(SrcRegs[i]);
6052 continue;
6053 }
6054
6055 // OpSegStart is where this destination segment would start in OpReg if it
6056 // extended infinitely in both directions.
6057 int64_t ExtractOffset;
6058 uint64_t SegSize;
6059 if (OpStart < SrcStart) {
6060 ExtractOffset = 0;
6061 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6062 } else {
6063 ExtractOffset = OpStart - SrcStart;
6064 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6065 }
6066
6067 Register SegReg = SrcRegs[i];
6068 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6069 // A genuine extract is needed.
6070 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6071 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6072 }
6073
6074 DstRegs.push_back(SegReg);
6075 }
6076
6077 Register DstReg = MI.getOperand(0).getReg();
6078 if (MRI.getType(DstReg).isVector())
6079 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6080 else if (DstRegs.size() > 1)
6081 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6082 else
6083 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6084 MI.eraseFromParent();
6085 return Legalized;
6086}
6087
6090 LLT NarrowTy) {
6091 // FIXME: Don't know how to handle secondary types yet.
6092 if (TypeIdx != 0)
6093 return UnableToLegalize;
6094
6095 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6097 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6098 LLT LeftoverTy;
6099 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6100 LeftoverRegs, MIRBuilder, MRI);
6101
6102 for (Register Reg : LeftoverRegs)
6103 SrcRegs.push_back(Reg);
6104
6105 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6106 Register OpReg = MI.getOperand(2).getReg();
6107 uint64_t OpStart = MI.getOperand(3).getImm();
6108 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6109 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6110 unsigned DstStart = I * NarrowSize;
6111
6112 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6113 // The entire subregister is defined by this insert, forward the new
6114 // value.
6115 DstRegs.push_back(OpReg);
6116 continue;
6117 }
6118
6119 Register SrcReg = SrcRegs[I];
6120 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6121 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6122 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6123 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6124 }
6125
6126 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6127 // No part of the insert affects this subregister, forward the original.
6128 DstRegs.push_back(SrcReg);
6129 continue;
6130 }
6131
6132 // OpSegStart is where this destination segment would start in OpReg if it
6133 // extended infinitely in both directions.
6134 int64_t ExtractOffset, InsertOffset;
6135 uint64_t SegSize;
6136 if (OpStart < DstStart) {
6137 InsertOffset = 0;
6138 ExtractOffset = DstStart - OpStart;
6139 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6140 } else {
6141 InsertOffset = OpStart - DstStart;
6142 ExtractOffset = 0;
6143 SegSize =
6144 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6145 }
6146
6147 Register SegReg = OpReg;
6148 if (ExtractOffset != 0 || SegSize != OpSize) {
6149 // A genuine extract is needed.
6150 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6151 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6152 }
6153
6154 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6155 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6156 DstRegs.push_back(DstReg);
6157 }
6158
6159 uint64_t WideSize = DstRegs.size() * NarrowSize;
6160 Register DstReg = MI.getOperand(0).getReg();
6161 if (WideSize > RegTy.getSizeInBits()) {
6162 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6163 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6164 MIRBuilder.buildTrunc(DstReg, MergeReg);
6165 } else
6166 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6167
6168 MI.eraseFromParent();
6169 return Legalized;
6170}
6171
6174 LLT NarrowTy) {
6175 Register DstReg = MI.getOperand(0).getReg();
6176 LLT DstTy = MRI.getType(DstReg);
6177
6178 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6179
6180 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6181 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6182 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6183 LLT LeftoverTy;
6184 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6185 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6186 return UnableToLegalize;
6187
6188 LLT Unused;
6189 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6190 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6191 llvm_unreachable("inconsistent extractParts result");
6192
6193 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6194 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6195 {Src0Regs[I], Src1Regs[I]});
6196 DstRegs.push_back(Inst.getReg(0));
6197 }
6198
6199 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6200 auto Inst = MIRBuilder.buildInstr(
6201 MI.getOpcode(),
6202 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6203 DstLeftoverRegs.push_back(Inst.getReg(0));
6204 }
6205
6206 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6207 LeftoverTy, DstLeftoverRegs);
6208
6209 MI.eraseFromParent();
6210 return Legalized;
6211}
6212
6215 LLT NarrowTy) {
6216 if (TypeIdx != 0)
6217 return UnableToLegalize;
6218
6219 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6220
6221 LLT DstTy = MRI.getType(DstReg);
6222 if (DstTy.isVector())
6223 return UnableToLegalize;
6224
6226 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6227 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6228 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6229
6230 MI.eraseFromParent();
6231 return Legalized;
6232}
6233
6236 LLT NarrowTy) {
6237 if (TypeIdx != 0)
6238 return UnableToLegalize;
6239
6240 Register CondReg = MI.getOperand(1).getReg();
6241 LLT CondTy = MRI.getType(CondReg);
6242 if (CondTy.isVector()) // TODO: Handle vselect
6243 return UnableToLegalize;
6244
6245 Register DstReg = MI.getOperand(0).getReg();
6246 LLT DstTy = MRI.getType(DstReg);
6247
6248 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6249 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6250 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6251 LLT LeftoverTy;
6252 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6253 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6254 return UnableToLegalize;
6255
6256 LLT Unused;
6257 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6258 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6259 llvm_unreachable("inconsistent extractParts result");
6260
6261 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6262 auto Select = MIRBuilder.buildSelect(NarrowTy,
6263 CondReg, Src1Regs[I], Src2Regs[I]);
6264 DstRegs.push_back(Select.getReg(0));
6265 }
6266
6267 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6269 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6270 DstLeftoverRegs.push_back(Select.getReg(0));
6271 }
6272
6273 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6274 LeftoverTy, DstLeftoverRegs);
6275
6276 MI.eraseFromParent();
6277 return Legalized;
6278}
6279
6282 LLT NarrowTy) {
6283 if (TypeIdx != 1)
6284 return UnableToLegalize;
6285
6286 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6287 unsigned NarrowSize = NarrowTy.getSizeInBits();
6288
6289 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6290 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6291
6293 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6294 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6295 auto C_0 = B.buildConstant(NarrowTy, 0);
6296 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6297 UnmergeSrc.getReg(1), C_0);
6298 auto LoCTLZ = IsUndef ?
6299 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6300 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6301 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6302 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6303 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6304 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6305
6306 MI.eraseFromParent();
6307 return Legalized;
6308 }
6309
6310 return UnableToLegalize;
6311}
6312
6315 LLT NarrowTy) {
6316 if (TypeIdx != 1)
6317 return UnableToLegalize;
6318
6319 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6320 unsigned NarrowSize = NarrowTy.getSizeInBits();
6321
6322 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6323 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6324
6326 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6327 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6328 auto C_0 = B.buildConstant(NarrowTy, 0);
6329 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6330 UnmergeSrc.getReg(0), C_0);
6331 auto HiCTTZ = IsUndef ?
6332 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6333 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6334 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6335 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6336 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6337 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6338
6339 MI.eraseFromParent();
6340 return Legalized;
6341 }
6342
6343 return UnableToLegalize;
6344}
6345
6348 LLT NarrowTy) {
6349 if (TypeIdx != 1)
6350 return UnableToLegalize;
6351
6352 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6353 unsigned NarrowSize = NarrowTy.getSizeInBits();
6354
6355 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6356 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6357
6358 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6359 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6360 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6361
6362 MI.eraseFromParent();
6363 return Legalized;
6364 }
6365
6366 return UnableToLegalize;
6367}
6368
6371 LLT NarrowTy) {
6372 if (TypeIdx != 1)
6373 return UnableToLegalize;
6374
6376 Register ExpReg = MI.getOperand(2).getReg();
6377 LLT ExpTy = MRI.getType(ExpReg);
6378
6379 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6380
6381 // Clamp the exponent to the range of the target type.
6382 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6383 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6384 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6385 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6386
6387 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6389 MI.getOperand(2).setReg(Trunc.getReg(0));
6391 return Legalized;
6392}
6393
6396 unsigned Opc = MI.getOpcode();
6397 const auto &TII = MIRBuilder.getTII();
6398 auto isSupported = [this](const LegalityQuery &Q) {
6399 auto QAction = LI.getAction(Q).Action;
6400 return QAction == Legal || QAction == Libcall || QAction == Custom;
6401 };
6402 switch (Opc) {
6403 default:
6404 return UnableToLegalize;
6405 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6406 // This trivially expands to CTLZ.
6408 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6410 return Legalized;
6411 }
6412 case TargetOpcode::G_CTLZ: {
6413 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6414 unsigned Len = SrcTy.getSizeInBits();
6415
6416 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6417 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6418 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6419 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6420 auto ICmp = MIRBuilder.buildICmp(
6421 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6422 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6423 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6424 MI.eraseFromParent();
6425 return Legalized;
6426 }
6427 // for now, we do this:
6428 // NewLen = NextPowerOf2(Len);
6429 // x = x | (x >> 1);
6430 // x = x | (x >> 2);
6431 // ...
6432 // x = x | (x >>16);
6433 // x = x | (x >>32); // for 64-bit input
6434 // Upto NewLen/2
6435 // return Len - popcount(x);
6436 //
6437 // Ref: "Hacker's Delight" by Henry Warren
6438 Register Op = SrcReg;
6439 unsigned NewLen = PowerOf2Ceil(Len);
6440 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6441 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6442 auto MIBOp = MIRBuilder.buildOr(
6443 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6444 Op = MIBOp.getReg(0);
6445 }
6446 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6447 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6448 MIBPop);
6449 MI.eraseFromParent();
6450 return Legalized;
6451 }
6452 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6453 // This trivially expands to CTTZ.
6455 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6457 return Legalized;
6458 }
6459 case TargetOpcode::G_CTTZ: {
6460 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6461
6462 unsigned Len = SrcTy.getSizeInBits();
6463 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6464 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6465 // zero.
6466 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6467 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6468 auto ICmp = MIRBuilder.buildICmp(
6469 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6470 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6471 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6472 MI.eraseFromParent();
6473 return Legalized;
6474 }
6475 // for now, we use: { return popcount(~x & (x - 1)); }
6476 // unless the target has ctlz but not ctpop, in which case we use:
6477 // { return 32 - nlz(~x & (x-1)); }
6478 // Ref: "Hacker's Delight" by Henry Warren
6479 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6480 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6481 auto MIBTmp = MIRBuilder.buildAnd(
6482 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6483 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6484 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6485 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6486 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6487 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6488 MI.eraseFromParent();
6489 return Legalized;
6490 }
6492 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6493 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6495 return Legalized;
6496 }
6497 case TargetOpcode::G_CTPOP: {
6498 Register SrcReg = MI.getOperand(1).getReg();
6499 LLT Ty = MRI.getType(SrcReg);
6500 unsigned Size = Ty.getSizeInBits();
6502
6503 // Count set bits in blocks of 2 bits. Default approach would be
6504 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6505 // We use following formula instead:
6506 // B2Count = val - { (val >> 1) & 0x55555555 }
6507 // since it gives same result in blocks of 2 with one instruction less.
6508 auto C_1 = B.buildConstant(Ty, 1);
6509 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6510 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6511 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6512 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6513 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6514
6515 // In order to get count in blocks of 4 add values from adjacent block of 2.
6516 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6517 auto C_2 = B.buildConstant(Ty, 2);
6518 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6519 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6520 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6521 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6522 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6523 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6524
6525 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6526 // addition since count value sits in range {0,...,8} and 4 bits are enough
6527 // to hold such binary values. After addition high 4 bits still hold count
6528 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6529 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6530 auto C_4 = B.buildConstant(Ty, 4);
6531 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6532 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6533 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6534 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6535 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6536
6537 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6538 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6539 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6540 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6541
6542 // Shift count result from 8 high bits to low bits.
6543 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6544
6545 auto IsMulSupported = [this](const LLT Ty) {
6546 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6547 return Action == Legal || Action == WidenScalar || Action == Custom;
6548 };
6549 if (IsMulSupported(Ty)) {
6550 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6551 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6552 } else {
6553 auto ResTmp = B8Count;
6554 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6555 auto ShiftC = B.buildConstant(Ty, Shift);
6556 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6557 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6558 }
6559 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6560 }
6561 MI.eraseFromParent();
6562 return Legalized;
6563 }
6564 }
6565}
6566
6567// Check that (every element of) Reg is undef or not an exact multiple of BW.
6569 Register Reg, unsigned BW) {
6570 return matchUnaryPredicate(
6571 MRI, Reg,
6572 [=](const Constant *C) {
6573 // Null constant here means an undef.
6574 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6575 return !CI || CI->getValue().urem(BW) != 0;
6576 },
6577 /*AllowUndefs*/ true);
6578}
6579
6582 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6583 LLT Ty = MRI.getType(Dst);
6584 LLT ShTy = MRI.getType(Z);
6585
6586 unsigned BW = Ty.getScalarSizeInBits();
6587
6588 if (!isPowerOf2_32(BW))
6589 return UnableToLegalize;
6590
6591 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6592 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6593
6594 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6595 // fshl X, Y, Z -> fshr X, Y, -Z
6596 // fshr X, Y, Z -> fshl X, Y, -Z
6597 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6598 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6599 } else {
6600 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6601 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6602 auto One = MIRBuilder.buildConstant(ShTy, 1);
6603 if (IsFSHL) {
6604 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6605 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6606 } else {
6607 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6608 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6609 }
6610
6611 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6612 }
6613
6614 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6615 MI.eraseFromParent();
6616 return Legalized;
6617}
6618
6621 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6622 LLT Ty = MRI.getType(Dst);
6623 LLT ShTy = MRI.getType(Z);
6624
6625 const unsigned BW = Ty.getScalarSizeInBits();
6626 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6627
6628 Register ShX, ShY;
6629 Register ShAmt, InvShAmt;
6630
6631 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6632 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6633 // fshl: X << C | Y >> (BW - C)
6634 // fshr: X << (BW - C) | Y >> C
6635 // where C = Z % BW is not zero
6636 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6637 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6638 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6639 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6640 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6641 } else {
6642 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6643 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6644 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6645 if (isPowerOf2_32(BW)) {
6646 // Z % BW -> Z & (BW - 1)
6647 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6648 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6649 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6650 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6651 } else {
6652 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6653 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6654 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6655 }
6656
6657 auto One = MIRBuilder.buildConstant(ShTy, 1);
6658 if (IsFSHL) {
6659 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6660 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6661 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6662 } else {
6663 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6664 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6665 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6666 }
6667 }
6668
6669 MIRBuilder.buildOr(Dst, ShX, ShY);
6670 MI.eraseFromParent();
6671 return Legalized;
6672}
6673
6676 // These operations approximately do the following (while avoiding undefined
6677 // shifts by BW):
6678 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6679 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6680 Register Dst = MI.getOperand(0).getReg();
6681 LLT Ty = MRI.getType(Dst);
6682 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6683
6684 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6685 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6686
6687 // TODO: Use smarter heuristic that accounts for vector legalization.
6688 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6689 return lowerFunnelShiftAsShifts(MI);
6690
6691 // This only works for powers of 2, fallback to shifts if it fails.
6692 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6693 if (Result == UnableToLegalize)
6694 return lowerFunnelShiftAsShifts(MI);
6695 return Result;
6696}
6697
6699 auto [Dst, Src] = MI.getFirst2Regs();
6700 LLT DstTy = MRI.getType(Dst);
6701 LLT SrcTy = MRI.getType(Src);
6702
6703 uint32_t DstTySize = DstTy.getSizeInBits();
6704 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6705 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6706
6707 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6708 !isPowerOf2_32(SrcTyScalarSize))
6709 return UnableToLegalize;
6710
6711 // The step between extend is too large, split it by creating an intermediate
6712 // extend instruction
6713 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6714 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6715 // If the destination type is illegal, split it into multiple statements
6716 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6717 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6718 // Unmerge the vector
6719 LLT EltTy = MidTy.changeElementCount(
6721 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6722
6723 // ZExt the vectors
6724 LLT ZExtResTy = DstTy.changeElementCount(
6726 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6727 {UnmergeSrc.getReg(0)});
6728 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6729 {UnmergeSrc.getReg(1)});
6730
6731 // Merge the ending vectors
6732 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6733
6734 MI.eraseFromParent();
6735 return Legalized;
6736 }
6737 return UnableToLegalize;
6738}
6739
6741 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6743 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6744 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6745 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6746 // %lo16(<4 x s16>) = G_TRUNC %inlo
6747 // %hi16(<4 x s16>) = G_TRUNC %inhi
6748 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6749 // %res(<8 x s8>) = G_TRUNC %in16
6750
6751 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6752
6753 Register DstReg = MI.getOperand(0).getReg();
6754 Register SrcReg = MI.getOperand(1).getReg();
6755 LLT DstTy = MRI.getType(DstReg);
6756 LLT SrcTy = MRI.getType(SrcReg);
6757
6758 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6760 isPowerOf2_32(SrcTy.getNumElements()) &&
6762 // Split input type.
6763 LLT SplitSrcTy = SrcTy.changeElementCount(
6765
6766 // First, split the source into two smaller vectors.
6767 SmallVector<Register, 2> SplitSrcs;
6768 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6769
6770 // Truncate the splits into intermediate narrower elements.
6771 LLT InterTy;
6772 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6773 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6774 else
6775 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6776 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6777 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6778 }
6779
6780 // Combine the new truncates into one vector
6782 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6783
6784 // Truncate the new vector to the final result type
6785 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6786 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6787 else
6788 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6789
6790 MI.eraseFromParent();
6791
6792 return Legalized;
6793 }
6794 return UnableToLegalize;
6795}
6796
6799 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6800 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6801 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6802 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6803 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6804 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6805 MI.eraseFromParent();
6806 return Legalized;
6807}
6808
6810 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6811
6812 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6813 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6814
6816
6817 // If a rotate in the other direction is supported, use it.
6818 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6819 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6820 isPowerOf2_32(EltSizeInBits))
6821 return lowerRotateWithReverseRotate(MI);
6822
6823 // If a funnel shift is supported, use it.
6824 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6825 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6826 bool IsFShLegal = false;
6827 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6828 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6829 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6830 Register R3) {
6831 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6832 MI.eraseFromParent();
6833 return Legalized;
6834 };
6835 // If a funnel shift in the other direction is supported, use it.
6836 if (IsFShLegal) {
6837 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6838 } else if (isPowerOf2_32(EltSizeInBits)) {
6839 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6840 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6841 }
6842 }
6843
6844 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6845 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6846 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6847 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6848 Register ShVal;
6849 Register RevShiftVal;
6850 if (isPowerOf2_32(EltSizeInBits)) {
6851 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6852 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6853 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6854 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6855 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6856 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6857 RevShiftVal =
6858 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6859 } else {
6860 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6861 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6862 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6863 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6864 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6865 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6866 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6867 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6868 RevShiftVal =
6869 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6870 }
6871 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6872 MI.eraseFromParent();
6873 return Legalized;
6874}
6875
6876// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6877// representation.
6880 auto [Dst, Src] = MI.getFirst2Regs();
6881 const LLT S64 = LLT::scalar(64);
6882 const LLT S32 = LLT::scalar(32);
6883 const LLT S1 = LLT::scalar(1);
6884
6885 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6886
6887 // unsigned cul2f(ulong u) {
6888 // uint lz = clz(u);
6889 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6890 // u = (u << lz) & 0x7fffffffffffffffUL;
6891 // ulong t = u & 0xffffffffffUL;
6892 // uint v = (e << 23) | (uint)(u >> 40);
6893 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6894 // return as_float(v + r);
6895 // }
6896
6897 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6898 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6899
6900 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6901
6902 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6903 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6904
6905 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6906 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6907
6908 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6909 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6910
6911 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6912
6913 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6914 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6915
6916 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6917 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6918 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6919
6920 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6921 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6922 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6923 auto One = MIRBuilder.buildConstant(S32, 1);
6924
6925 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6926 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6927 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6928 MIRBuilder.buildAdd(Dst, V, R);
6929
6930 MI.eraseFromParent();
6931 return Legalized;
6932}
6933
6935 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6936
6937 if (SrcTy == LLT::scalar(1)) {
6938 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6939 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6940 MIRBuilder.buildSelect(Dst, Src, True, False);
6941 MI.eraseFromParent();
6942 return Legalized;
6943 }
6944
6945 if (SrcTy != LLT::scalar(64))
6946 return UnableToLegalize;
6947
6948 if (DstTy == LLT::scalar(32)) {
6949 // TODO: SelectionDAG has several alternative expansions to port which may
6950 // be more reasonble depending on the available instructions. If a target
6951 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6952 // intermediate type, this is probably worse.
6953 return lowerU64ToF32BitOps(MI);
6954 }
6955
6956 return UnableToLegalize;
6957}
6958
6960 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6961
6962 const LLT S64 = LLT::scalar(64);
6963 const LLT S32 = LLT::scalar(32);
6964 const LLT S1 = LLT::scalar(1);
6965
6966 if (SrcTy == S1) {
6967 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6968 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6969 MIRBuilder.buildSelect(Dst, Src, True, False);
6970 MI.eraseFromParent();
6971 return Legalized;
6972 }
6973
6974 if (SrcTy != S64)
6975 return UnableToLegalize;
6976
6977 if (DstTy == S32) {
6978 // signed cl2f(long l) {
6979 // long s = l >> 63;
6980 // float r = cul2f((l + s) ^ s);
6981 // return s ? -r : r;
6982 // }
6983 Register L = Src;
6984 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6985 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6986
6987 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6988 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6989 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6990
6991 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6992 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6994 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6995 MI.eraseFromParent();
6996 return Legalized;
6997 }
6998
6999 return UnableToLegalize;
7000}
7001
7003 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7004 const LLT S64 = LLT::scalar(64);
7005 const LLT S32 = LLT::scalar(32);
7006
7007 if (SrcTy != S64 && SrcTy != S32)
7008 return UnableToLegalize;
7009 if (DstTy != S32 && DstTy != S64)
7010 return UnableToLegalize;
7011
7012 // FPTOSI gives same result as FPTOUI for positive signed integers.
7013 // FPTOUI needs to deal with fp values that convert to unsigned integers
7014 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7015
7016 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7017 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7019 APInt::getZero(SrcTy.getSizeInBits()));
7020 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7021
7022 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7023
7024 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7025 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7026 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7027 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7028 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7029 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7030 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7031
7032 const LLT S1 = LLT::scalar(1);
7033
7034 MachineInstrBuilder FCMP =
7035 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7036 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7037
7038 MI.eraseFromParent();
7039 return Legalized;
7040}
7041
7043 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7044 const LLT S64 = LLT::scalar(64);
7045 const LLT S32 = LLT::scalar(32);
7046
7047 // FIXME: Only f32 to i64 conversions are supported.
7048 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7049 return UnableToLegalize;
7050
7051 // Expand f32 -> i64 conversion
7052 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7053 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7054
7055 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7056
7057 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7058 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7059
7060 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7061 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7062
7063 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7064 APInt::getSignMask(SrcEltBits));
7065 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7066 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7067 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7068 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7069
7070 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7071 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7072 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7073
7074 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7075 R = MIRBuilder.buildZExt(DstTy, R);
7076
7077 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7078 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7079 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7080 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7081
7082 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7083 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7084
7085 const LLT S1 = LLT::scalar(1);
7087 S1, Exponent, ExponentLoBit);
7088
7089 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7090
7091 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7092 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7093
7094 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7095
7096 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7097 S1, Exponent, ZeroSrcTy);
7098
7099 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7100 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7101
7102 MI.eraseFromParent();
7103 return Legalized;
7104}
7105
7106// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7109 const LLT S1 = LLT::scalar(1);
7110 const LLT S32 = LLT::scalar(32);
7111
7112 auto [Dst, Src] = MI.getFirst2Regs();
7113 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7114 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7115
7116 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7117 return UnableToLegalize;
7118
7120 unsigned Flags = MI.getFlags();
7121 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7122 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7123 MI.eraseFromParent();
7124 return Legalized;
7125 }
7126
7127 const unsigned ExpMask = 0x7ff;
7128 const unsigned ExpBiasf64 = 1023;
7129 const unsigned ExpBiasf16 = 15;
7130
7131 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7132 Register U = Unmerge.getReg(0);
7133 Register UH = Unmerge.getReg(1);
7134
7135 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7137
7138 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7139 // add the f16 bias (15) to get the biased exponent for the f16 format.
7140 E = MIRBuilder.buildAdd(
7141 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7142
7145
7146 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7147 MIRBuilder.buildConstant(S32, 0x1ff));
7148 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7149
7150 auto Zero = MIRBuilder.buildConstant(S32, 0);
7151 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7152 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7153 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7154
7155 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7156 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7157 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7158 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7159
7160 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7161 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7162
7163 // N = M | (E << 12);
7164 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7165 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7166
7167 // B = clamp(1-E, 0, 13);
7168 auto One = MIRBuilder.buildConstant(S32, 1);
7169 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7170 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7172
7173 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7174 MIRBuilder.buildConstant(S32, 0x1000));
7175
7176 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7177 auto D0 = MIRBuilder.buildShl(S32, D, B);
7178
7179 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7180 D0, SigSetHigh);
7181 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7182 D = MIRBuilder.buildOr(S32, D, D1);
7183
7184 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7185 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7186
7187 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7189
7190 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7192 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7193
7194 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7196 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7197
7198 V1 = MIRBuilder.buildOr(S32, V0, V1);
7199 V = MIRBuilder.buildAdd(S32, V, V1);
7200
7201 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7202 E, MIRBuilder.buildConstant(S32, 30));
7203 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7204 MIRBuilder.buildConstant(S32, 0x7c00), V);
7205
7206 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7207 E, MIRBuilder.buildConstant(S32, 1039));
7208 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7209
7210 // Extract the sign bit.
7211 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7212 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7213
7214 // Insert the sign bit
7215 V = MIRBuilder.buildOr(S32, Sign, V);
7216
7217 MIRBuilder.buildTrunc(Dst, V);
7218 MI.eraseFromParent();
7219 return Legalized;
7220}
7221
7224 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7225 const LLT S64 = LLT::scalar(64);
7226 const LLT S16 = LLT::scalar(16);
7227
7228 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7230
7231 return UnableToLegalize;
7232}
7233
7235 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7236 LLT Ty = MRI.getType(Dst);
7237
7238 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7239 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7240 MI.eraseFromParent();
7241 return Legalized;
7242}
7243
7245 switch (Opc) {
7246 case TargetOpcode::G_SMIN:
7247 return CmpInst::ICMP_SLT;
7248 case TargetOpcode::G_SMAX:
7249 return CmpInst::ICMP_SGT;
7250 case TargetOpcode::G_UMIN:
7251 return CmpInst::ICMP_ULT;
7252 case TargetOpcode::G_UMAX:
7253 return CmpInst::ICMP_UGT;
7254 default:
7255 llvm_unreachable("not in integer min/max");
7256 }
7257}
7258
7260 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7261
7262 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7263 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7264
7265 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7266 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7267
7268 MI.eraseFromParent();
7269 return Legalized;
7270}
7271
7274 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7275 const int Src0Size = Src0Ty.getScalarSizeInBits();
7276 const int Src1Size = Src1Ty.getScalarSizeInBits();
7277
7278 auto SignBitMask = MIRBuilder.buildConstant(
7279 Src0Ty, APInt::getSignMask(Src0Size));
7280
7281 auto NotSignBitMask = MIRBuilder.buildConstant(
7282 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7283
7284 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7285 Register And1;
7286 if (Src0Ty == Src1Ty) {
7287 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7288 } else if (Src0Size > Src1Size) {
7289 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7290 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7291 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7292 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7293 } else {
7294 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7295 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7296 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7297 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7298 }
7299
7300 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7301 // constants are a nan and -0.0, but the final result should preserve
7302 // everything.
7303 unsigned Flags = MI.getFlags();
7304
7305 // We masked the sign bit and the not-sign bit, so these are disjoint.
7306 Flags |= MachineInstr::Disjoint;
7307
7308 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7309
7310 MI.eraseFromParent();
7311 return Legalized;
7312}
7313
7316 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7317 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7318
7319 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7320 LLT Ty = MRI.getType(Dst);
7321
7322 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7323 // Insert canonicalizes if it's possible we need to quiet to get correct
7324 // sNaN behavior.
7325
7326 // Note this must be done here, and not as an optimization combine in the
7327 // absence of a dedicate quiet-snan instruction as we're using an
7328 // omni-purpose G_FCANONICALIZE.
7329 if (!isKnownNeverSNaN(Src0, MRI))
7330 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7331
7332 if (!isKnownNeverSNaN(Src1, MRI))
7333 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7334 }
7335
7336 // If there are no nans, it's safe to simply replace this with the non-IEEE
7337 // version.
7338 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7339 MI.eraseFromParent();
7340 return Legalized;
7341}
7342
7344 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7345 Register DstReg = MI.getOperand(0).getReg();
7346 LLT Ty = MRI.getType(DstReg);
7347 unsigned Flags = MI.getFlags();
7348
7349 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7350 Flags);
7351 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7352 MI.eraseFromParent();
7353 return Legalized;
7354}
7355
7358 auto [DstReg, X] = MI.getFirst2Regs();
7359 const unsigned Flags = MI.getFlags();
7360 const LLT Ty = MRI.getType(DstReg);
7361 const LLT CondTy = Ty.changeElementSize(1);
7362
7363 // round(x) =>
7364 // t = trunc(x);
7365 // d = fabs(x - t);
7366 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7367 // return t + o;
7368
7369 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7370
7371 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7372 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7373
7374 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7375 auto Cmp =
7376 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7377
7378 // Could emit G_UITOFP instead
7379 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7380 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7381 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7382 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7383
7384 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7385
7386 MI.eraseFromParent();
7387 return Legalized;
7388}
7389
7391 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7392 unsigned Flags = MI.getFlags();
7393 LLT Ty = MRI.getType(DstReg);
7394 const LLT CondTy = Ty.changeElementSize(1);
7395
7396 // result = trunc(src);
7397 // if (src < 0.0 && src != result)
7398 // result += -1.0.
7399
7400 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7401 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7402
7403 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7404 SrcReg, Zero, Flags);
7405 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7406 SrcReg, Trunc, Flags);
7407 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7408 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7409
7410 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7411 MI.eraseFromParent();
7412 return Legalized;
7413}
7414
7417 const unsigned NumOps = MI.getNumOperands();
7418 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7419 unsigned PartSize = Src0Ty.getSizeInBits();
7420
7421 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7422 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7423
7424 for (unsigned I = 2; I != NumOps; ++I) {
7425 const unsigned Offset = (I - 1) * PartSize;
7426
7427 Register SrcReg = MI.getOperand(I).getReg();
7428 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7429
7430 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7431 MRI.createGenericVirtualRegister(WideTy);
7432
7433 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7434 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7435 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7436 ResultReg = NextResult;
7437 }
7438
7439 if (DstTy.isPointer()) {
7441 DstTy.getAddressSpace())) {
7442 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7443 return UnableToLegalize;
7444 }
7445
7446 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7447 }
7448
7449 MI.eraseFromParent();
7450 return Legalized;
7451}
7452
7455 const unsigned NumDst = MI.getNumOperands() - 1;
7456 Register SrcReg = MI.getOperand(NumDst).getReg();
7457 Register Dst0Reg = MI.getOperand(0).getReg();
7458 LLT DstTy = MRI.getType(Dst0Reg);
7459 if (DstTy.isPointer())
7460 return UnableToLegalize; // TODO
7461
7462 SrcReg = coerceToScalar(SrcReg);
7463 if (!SrcReg)
7464 return UnableToLegalize;
7465
7466 // Expand scalarizing unmerge as bitcast to integer and shift.
7467 LLT IntTy = MRI.getType(SrcReg);
7468
7469 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7470
7471 const unsigned DstSize = DstTy.getSizeInBits();
7472 unsigned Offset = DstSize;
7473 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7474 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7475 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7476 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7477 }
7478
7479 MI.eraseFromParent();
7480 return Legalized;
7481}
7482
7483/// Lower a vector extract or insert by writing the vector to a stack temporary
7484/// and reloading the element or vector.
7485///
7486/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7487/// =>
7488/// %stack_temp = G_FRAME_INDEX
7489/// G_STORE %vec, %stack_temp
7490/// %idx = clamp(%idx, %vec.getNumElements())
7491/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7492/// %dst = G_LOAD %element_ptr
7495 Register DstReg = MI.getOperand(0).getReg();
7496 Register SrcVec = MI.getOperand(1).getReg();
7497 Register InsertVal;
7498 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7499 InsertVal = MI.getOperand(2).getReg();
7500
7501 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7502
7503 LLT VecTy = MRI.getType(SrcVec);
7504 LLT EltTy = VecTy.getElementType();
7505 unsigned NumElts = VecTy.getNumElements();
7506
7507 int64_t IdxVal;
7508 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7510 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7511
7512 if (InsertVal) {
7513 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7514 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7515 } else {
7516 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7517 }
7518
7519 MI.eraseFromParent();
7520 return Legalized;
7521 }
7522
7523 if (!EltTy.isByteSized()) { // Not implemented.
7524 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7525 return UnableToLegalize;
7526 }
7527
7528 unsigned EltBytes = EltTy.getSizeInBytes();
7529 Align VecAlign = getStackTemporaryAlignment(VecTy);
7530 Align EltAlign;
7531
7532 MachinePointerInfo PtrInfo;
7533 auto StackTemp = createStackTemporary(
7534 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7535 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7536
7537 // Get the pointer to the element, and be sure not to hit undefined behavior
7538 // if the index is out of bounds.
7539 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7540
7541 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7542 int64_t Offset = IdxVal * EltBytes;
7543 PtrInfo = PtrInfo.getWithOffset(Offset);
7544 EltAlign = commonAlignment(VecAlign, Offset);
7545 } else {
7546 // We lose information with a variable offset.
7547 EltAlign = getStackTemporaryAlignment(EltTy);
7548 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7549 }
7550
7551 if (InsertVal) {
7552 // Write the inserted element
7553 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7554
7555 // Reload the whole vector.
7556 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7557 } else {
7558 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7559 }
7560
7561 MI.eraseFromParent();
7562 return Legalized;
7563}
7564
7567 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7568 MI.getFirst3RegLLTs();
7569 LLT IdxTy = LLT::scalar(32);
7570
7571 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7572 Register Undef;
7574 LLT EltTy = DstTy.getScalarType();
7575
7576 for (int Idx : Mask) {
7577 if (Idx < 0) {
7578 if (!Undef.isValid())
7579 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7580 BuildVec.push_back(Undef);
7581 continue;
7582 }
7583
7584 if (Src0Ty.isScalar()) {
7585 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7586 } else {
7587 int NumElts = Src0Ty.getNumElements();
7588 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7589 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7590 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7591 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7592 BuildVec.push_back(Extract.getReg(0));
7593 }
7594 }
7595
7596 if (DstTy.isScalar())
7597 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7598 else
7599 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7600 MI.eraseFromParent();
7601 return Legalized;
7602}
7603
7606 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7607 MI.getFirst4RegLLTs();
7608
7609 if (VecTy.isScalableVector())
7610 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
7611
7612 Align VecAlign = getStackTemporaryAlignment(VecTy);
7613 MachinePointerInfo PtrInfo;
7614 Register StackPtr =
7615 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
7616 PtrInfo)
7617 .getReg(0);
7618 MachinePointerInfo ValPtrInfo =
7620
7621 LLT IdxTy = LLT::scalar(32);
7622 LLT ValTy = VecTy.getElementType();
7623 Align ValAlign = getStackTemporaryAlignment(ValTy);
7624
7625 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
7626
7627 bool HasPassthru =
7628 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
7629
7630 if (HasPassthru)
7631 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
7632
7633 Register LastWriteVal;
7634 std::optional<APInt> PassthruSplatVal =
7635 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
7636
7637 if (PassthruSplatVal.has_value()) {
7638 LastWriteVal =
7639 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
7640 } else if (HasPassthru) {
7641 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
7642 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
7643 {LLT::scalar(32)}, {Popcount});
7644
7645 Register LastElmtPtr =
7646 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
7647 LastWriteVal =
7648 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
7649 .getReg(0);
7650 }
7651
7652 unsigned NumElmts = VecTy.getNumElements();
7653 for (unsigned I = 0; I < NumElmts; ++I) {
7654 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
7655 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
7656 Register ElmtPtr =
7657 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7658 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
7659
7660 LLT MaskITy = MaskTy.getElementType();
7661 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
7662 if (MaskITy.getSizeInBits() > 1)
7663 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
7664
7665 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
7666 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
7667
7668 if (HasPassthru && I == NumElmts - 1) {
7669 auto EndOfVector =
7670 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
7671 auto AllLanesSelected = MIRBuilder.buildICmp(
7672 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
7673 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
7674 {OutPos, EndOfVector});
7675 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7676
7677 LastWriteVal =
7678 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
7679 .getReg(0);
7680 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
7681 }
7682 }
7683
7684 // TODO: Use StackPtr's FrameIndex alignment.
7685 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
7686
7687 MI.eraseFromParent();
7688 return Legalized;
7689}
7690
7692 Register AllocSize,
7693 Align Alignment,
7694 LLT PtrTy) {
7695 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7696
7697 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7698 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7699
7700 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7701 // have to generate an extra instruction to negate the alloc and then use
7702 // G_PTR_ADD to add the negative offset.
7703 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7704 if (Alignment > Align(1)) {
7705 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7706 AlignMask.negate();
7707 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7708 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7709 }
7710
7711 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7712}
7713
7716 const auto &MF = *MI.getMF();
7717 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7718 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7719 return UnableToLegalize;
7720
7721 Register Dst = MI.getOperand(0).getReg();
7722 Register AllocSize = MI.getOperand(1).getReg();
7723 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7724
7725 LLT PtrTy = MRI.getType(Dst);
7727 Register SPTmp =
7728 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7729
7730 MIRBuilder.buildCopy(SPReg, SPTmp);
7731 MIRBuilder.buildCopy(Dst, SPTmp);
7732
7733 MI.eraseFromParent();
7734 return Legalized;
7735}
7736
7740 if (!StackPtr)
7741 return UnableToLegalize;
7742
7743 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7744 MI.eraseFromParent();
7745 return Legalized;
7746}
7747
7751 if (!StackPtr)
7752 return UnableToLegalize;
7753
7754 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7755 MI.eraseFromParent();
7756 return Legalized;
7757}
7758
7761 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7762 unsigned Offset = MI.getOperand(2).getImm();
7763
7764 // Extract sub-vector or one element
7765 if (SrcTy.isVector()) {
7766 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7767 unsigned DstSize = DstTy.getSizeInBits();
7768
7769 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7770 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7771 // Unmerge and allow access to each Src element for the artifact combiner.
7772 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7773
7774 // Take element(s) we need to extract and copy it (merge them).
7775 SmallVector<Register, 8> SubVectorElts;
7776 for (unsigned Idx = Offset / SrcEltSize;
7777 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7778 SubVectorElts.push_back(Unmerge.getReg(Idx));
7779 }
7780 if (SubVectorElts.size() == 1)
7781 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7782 else
7783 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7784
7785 MI.eraseFromParent();
7786 return Legalized;
7787 }
7788 }
7789
7790 if (DstTy.isScalar() &&
7791 (SrcTy.isScalar() ||
7792 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7793 LLT SrcIntTy = SrcTy;
7794 if (!SrcTy.isScalar()) {
7795 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7796 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7797 }
7798
7799 if (Offset == 0)
7800 MIRBuilder.buildTrunc(DstReg, SrcReg);
7801 else {
7802 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7803 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7804 MIRBuilder.buildTrunc(DstReg, Shr);
7805 }
7806
7807 MI.eraseFromParent();
7808 return Legalized;
7809 }
7810
7811 return UnableToLegalize;
7812}
7813
7815 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7816 uint64_t Offset = MI.getOperand(3).getImm();
7817
7818 LLT DstTy = MRI.getType(Src);
7819 LLT InsertTy = MRI.getType(InsertSrc);
7820
7821 // Insert sub-vector or one element
7822 if (DstTy.isVector() && !InsertTy.isPointer()) {
7823 LLT EltTy = DstTy.getElementType();
7824 unsigned EltSize = EltTy.getSizeInBits();
7825 unsigned InsertSize = InsertTy.getSizeInBits();
7826
7827 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7828 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7829 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7831 unsigned Idx = 0;
7832 // Elements from Src before insert start Offset
7833 for (; Idx < Offset / EltSize; ++Idx) {
7834 DstElts.push_back(UnmergeSrc.getReg(Idx));
7835 }
7836
7837 // Replace elements in Src with elements from InsertSrc
7838 if (InsertTy.getSizeInBits() > EltSize) {
7839 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7840 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7841 ++Idx, ++i) {
7842 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7843 }
7844 } else {
7845 DstElts.push_back(InsertSrc);
7846 ++Idx;
7847 }
7848
7849 // Remaining elements from Src after insert
7850 for (; Idx < DstTy.getNumElements(); ++Idx) {
7851 DstElts.push_back(UnmergeSrc.getReg(Idx));
7852 }
7853
7854 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7855 MI.eraseFromParent();
7856 return Legalized;
7857 }
7858 }
7859
7860 if (InsertTy.isVector() ||
7861 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7862 return UnableToLegalize;
7863
7865 if ((DstTy.isPointer() &&
7866 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7867 (InsertTy.isPointer() &&
7868 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7869 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7870 return UnableToLegalize;
7871 }
7872
7873 LLT IntDstTy = DstTy;
7874
7875 if (!DstTy.isScalar()) {
7876 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7877 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7878 }
7879
7880 if (!InsertTy.isScalar()) {
7881 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7882 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7883 }
7884
7885 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7886 if (Offset != 0) {
7887 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7888 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7889 }
7890
7892 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7893
7894 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7895 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7896 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7897
7898 MIRBuilder.buildCast(Dst, Or);
7899 MI.eraseFromParent();
7900 return Legalized;
7901}
7902
7905 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7906 MI.getFirst4RegLLTs();
7907 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7908
7909 LLT Ty = Dst0Ty;
7910 LLT BoolTy = Dst1Ty;
7911
7912 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7913
7914 if (IsAdd)
7915 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
7916 else
7917 MIRBuilder.buildSub(NewDst0, LHS, RHS);
7918
7919 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7920
7921 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7922
7923 // For an addition, the result should be less than one of the operands (LHS)
7924 // if and only if the other operand (RHS) is negative, otherwise there will
7925 // be overflow.
7926 // For a subtraction, the result should be less than one of the operands
7927 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7928 // otherwise there will be overflow.
7929 auto ResultLowerThanLHS =
7930 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
7931 auto ConditionRHS = MIRBuilder.buildICmp(
7932 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7933
7934 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7935
7936 MIRBuilder.buildCopy(Dst0, NewDst0);
7937 MI.eraseFromParent();
7938
7939 return Legalized;
7940}
7941
7944 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7945 LLT Ty = MRI.getType(Res);
7946 bool IsSigned;
7947 bool IsAdd;
7948 unsigned BaseOp;
7949 switch (MI.getOpcode()) {
7950 default:
7951 llvm_unreachable("unexpected addsat/subsat opcode");
7952 case TargetOpcode::G_UADDSAT:
7953 IsSigned = false;
7954 IsAdd = true;
7955 BaseOp = TargetOpcode::G_ADD;
7956 break;
7957 case TargetOpcode::G_SADDSAT:
7958 IsSigned = true;
7959 IsAdd = true;
7960 BaseOp = TargetOpcode::G_ADD;
7961 break;
7962 case TargetOpcode::G_USUBSAT:
7963 IsSigned = false;
7964 IsAdd = false;
7965 BaseOp = TargetOpcode::G_SUB;
7966 break;
7967 case TargetOpcode::G_SSUBSAT:
7968 IsSigned = true;
7969 IsAdd = false;
7970 BaseOp = TargetOpcode::G_SUB;
7971 break;
7972 }
7973
7974 if (IsSigned) {
7975 // sadd.sat(a, b) ->
7976 // hi = 0x7fffffff - smax(a, 0)
7977 // lo = 0x80000000 - smin(a, 0)
7978 // a + smin(smax(lo, b), hi)
7979 // ssub.sat(a, b) ->
7980 // lo = smax(a, -1) - 0x7fffffff
7981 // hi = smin(a, -1) - 0x80000000
7982 // a - smin(smax(lo, b), hi)
7983 // TODO: AMDGPU can use a "median of 3" instruction here:
7984 // a +/- med3(lo, b, hi)
7985 uint64_t NumBits = Ty.getScalarSizeInBits();
7986 auto MaxVal =
7988 auto MinVal =
7991 if (IsAdd) {
7992 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7993 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7994 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7995 } else {
7996 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7997 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7998 MaxVal);
7999 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8000 MinVal);
8001 }
8002 auto RHSClamped =
8004 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8005 } else {
8006 // uadd.sat(a, b) -> a + umin(~a, b)
8007 // usub.sat(a, b) -> a - umin(a, b)
8008 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8009 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8010 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8011 }
8012
8013 MI.eraseFromParent();
8014 return Legalized;
8015}
8016
8019 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8020 LLT Ty = MRI.getType(Res);
8021 LLT BoolTy = Ty.changeElementSize(1);
8022 bool IsSigned;
8023 bool IsAdd;
8024 unsigned OverflowOp;
8025 switch (MI.getOpcode()) {
8026 default:
8027 llvm_unreachable("unexpected addsat/subsat opcode");
8028 case TargetOpcode::G_UADDSAT:
8029 IsSigned = false;
8030 IsAdd = true;
8031 OverflowOp = TargetOpcode::G_UADDO;
8032 break;
8033 case TargetOpcode::G_SADDSAT:
8034 IsSigned = true;
8035 IsAdd = true;
8036 OverflowOp = TargetOpcode::G_SADDO;
8037 break;
8038 case TargetOpcode::G_USUBSAT:
8039 IsSigned = false;
8040 IsAdd = false;
8041 OverflowOp = TargetOpcode::G_USUBO;
8042 break;
8043 case TargetOpcode::G_SSUBSAT:
8044 IsSigned = true;
8045 IsAdd = false;
8046 OverflowOp = TargetOpcode::G_SSUBO;
8047 break;
8048 }
8049
8050 auto OverflowRes =
8051 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8052 Register Tmp = OverflowRes.getReg(0);
8053 Register Ov = OverflowRes.getReg(1);
8054 MachineInstrBuilder Clamp;
8055 if (IsSigned) {
8056 // sadd.sat(a, b) ->
8057 // {tmp, ov} = saddo(a, b)
8058 // ov ? (tmp >>s 31) + 0x80000000 : r
8059 // ssub.sat(a, b) ->
8060 // {tmp, ov} = ssubo(a, b)
8061 // ov ? (tmp >>s 31) + 0x80000000 : r
8062 uint64_t NumBits = Ty.getScalarSizeInBits();
8063 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8064 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8065 auto MinVal =
8067 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8068 } else {
8069 // uadd.sat(a, b) ->
8070 // {tmp, ov} = uaddo(a, b)
8071 // ov ? 0xffffffff : tmp
8072 // usub.sat(a, b) ->
8073 // {tmp, ov} = usubo(a, b)
8074 // ov ? 0 : tmp
8075 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8076 }
8077 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8078
8079 MI.eraseFromParent();
8080 return Legalized;
8081}
8082
8085 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8086 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8087 "Expected shlsat opcode!");
8088 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8089 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8090 LLT Ty = MRI.getType(Res);
8091 LLT BoolTy = Ty.changeElementSize(1);
8092
8093 unsigned BW = Ty.getScalarSizeInBits();
8094 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8095 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8096 : MIRBuilder.buildLShr(Ty, Result, RHS);
8097
8098 MachineInstrBuilder SatVal;
8099 if (IsSigned) {
8100 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8101 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8102 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8103 MIRBuilder.buildConstant(Ty, 0));
8104 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8105 } else {
8107 }
8108 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8109 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8110
8111 MI.eraseFromParent();
8112 return Legalized;
8113}
8114
8116 auto [Dst, Src] = MI.getFirst2Regs();
8117 const LLT Ty = MRI.getType(Src);
8118 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8119 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8120
8121 // Swap most and least significant byte, set remaining bytes in Res to zero.
8122 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8123 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8124 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8125 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8126
8127 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8128 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8129 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8130 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8131 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8132 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8133 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8134 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8135 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8136 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8137 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8138 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8139 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8140 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8141 }
8142 Res.getInstr()->getOperand(0).setReg(Dst);
8143
8144 MI.eraseFromParent();
8145 return Legalized;
8146}
8147
8148//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8150 MachineInstrBuilder Src, const APInt &Mask) {
8151 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8152 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8153 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8154 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8155 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8156 return B.buildOr(Dst, LHS, RHS);
8157}
8158
8161 auto [Dst, Src] = MI.getFirst2Regs();
8162 const LLT Ty = MRI.getType(Src);
8163 unsigned Size = Ty.getScalarSizeInBits();
8164
8165 if (Size >= 8) {
8166 MachineInstrBuilder BSWAP =
8167 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8168
8169 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8170 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8171 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8172 MachineInstrBuilder Swap4 =
8173 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8174
8175 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8176 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8177 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8178 MachineInstrBuilder Swap2 =
8179 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8180
8181 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8182 // 6|7
8183 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8184 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8185 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8186 } else {
8187 // Expand bitreverse for types smaller than 8 bits.
8189 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8191 if (I < J) {
8192 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8193 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8194 } else {
8195 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8196 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8197 }
8198
8199 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
8200 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8201 if (I == 0)
8202 Tmp = Tmp2;
8203 else
8204 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8205 }
8206 MIRBuilder.buildCopy(Dst, Tmp);
8207 }
8208
8209 MI.eraseFromParent();
8210 return Legalized;
8211}
8212
8216
8217 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8218 int NameOpIdx = IsRead ? 1 : 0;
8219 int ValRegIndex = IsRead ? 0 : 1;
8220
8221 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8222 const LLT Ty = MRI.getType(ValReg);
8223 const MDString *RegStr = cast<MDString>(
8224 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8225
8226 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8227 if (!PhysReg.isValid())
8228 return UnableToLegalize;
8229
8230 if (IsRead)
8231 MIRBuilder.buildCopy(ValReg, PhysReg);
8232 else
8233 MIRBuilder.buildCopy(PhysReg, ValReg);
8234
8235 MI.eraseFromParent();
8236 return Legalized;
8237}
8238
8241 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8242 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8243 Register Result = MI.getOperand(0).getReg();
8244 LLT OrigTy = MRI.getType(Result);
8245 auto SizeInBits = OrigTy.getScalarSizeInBits();
8246 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8247
8248 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8249 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8250 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8251 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8252
8253 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8254 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8255 MIRBuilder.buildTrunc(Result, Shifted);
8256
8257 MI.eraseFromParent();
8258 return Legalized;
8259}
8260
8263 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8264 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8265
8266 if (Mask == fcNone) {
8267 MIRBuilder.buildConstant(DstReg, 0);
8268 MI.eraseFromParent();
8269 return Legalized;
8270 }
8271 if (Mask == fcAllFlags) {
8272 MIRBuilder.buildConstant(DstReg, 1);
8273 MI.eraseFromParent();
8274 return Legalized;
8275 }
8276
8277 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8278 // version
8279
8280 unsigned BitSize = SrcTy.getScalarSizeInBits();
8281 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8282
8283 LLT IntTy = LLT::scalar(BitSize);
8284 if (SrcTy.isVector())
8285 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8286 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8287
8288 // Various masks.
8289 APInt SignBit = APInt::getSignMask(BitSize);
8290 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8291 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8292 APInt ExpMask = Inf;
8293 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8294 APInt QNaNBitMask =
8295 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8296 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8297
8298 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8299 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8300 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8301 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8302 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8303
8304 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8305 auto Sign =
8307
8308 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8309 // Clang doesn't support capture of structured bindings:
8310 LLT DstTyCopy = DstTy;
8311 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8312 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8313 };
8314
8315 // Tests that involve more than one class should be processed first.
8316 if ((Mask & fcFinite) == fcFinite) {
8317 // finite(V) ==> abs(V) u< exp_mask
8318 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8319 ExpMaskC));
8320 Mask &= ~fcFinite;
8321 } else if ((Mask & fcFinite) == fcPosFinite) {
8322 // finite(V) && V > 0 ==> V u< exp_mask
8323 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8324 ExpMaskC));
8325 Mask &= ~fcPosFinite;
8326 } else if ((Mask & fcFinite) == fcNegFinite) {
8327 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8328 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8329 ExpMaskC);
8330 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8331 appendToRes(And);
8332 Mask &= ~fcNegFinite;
8333 }
8334
8335 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8336 // fcZero | fcSubnormal => test all exponent bits are 0
8337 // TODO: Handle sign bit specific cases
8338 // TODO: Handle inverted case
8339 if (PartialCheck == (fcZero | fcSubnormal)) {
8340 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8342 ExpBits, ZeroC));
8343 Mask &= ~PartialCheck;
8344 }
8345 }
8346
8347 // Check for individual classes.
8348 if (FPClassTest PartialCheck = Mask & fcZero) {
8349 if (PartialCheck == fcPosZero)
8351 AsInt, ZeroC));
8352 else if (PartialCheck == fcZero)
8353 appendToRes(
8355 else // fcNegZero
8357 AsInt, SignBitC));
8358 }
8359
8360 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8361 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8362 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8363 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8364 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8365 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8366 auto SubnormalRes =
8368 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8369 if (PartialCheck == fcNegSubnormal)
8370 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8371 appendToRes(SubnormalRes);
8372 }
8373
8374 if (FPClassTest PartialCheck = Mask & fcInf) {
8375 if (PartialCheck == fcPosInf)
8377 AsInt, InfC));
8378 else if (PartialCheck == fcInf)
8379 appendToRes(
8381 else { // fcNegInf
8382 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8383 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8385 AsInt, NegInfC));
8386 }
8387 }
8388
8389 if (FPClassTest PartialCheck = Mask & fcNan) {
8390 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8391 if (PartialCheck == fcNan) {
8392 // isnan(V) ==> abs(V) u> int(inf)
8393 appendToRes(
8395 } else if (PartialCheck == fcQNan) {
8396 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8397 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8398 InfWithQnanBitC));
8399 } else { // fcSNan
8400 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8401 // abs(V) u< (unsigned(Inf) | quiet_bit)
8402 auto IsNan =
8404 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8405 Abs, InfWithQnanBitC);
8406 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8407 }
8408 }
8409
8410 if (FPClassTest PartialCheck = Mask & fcNormal) {
8411 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8412 // (max_exp-1))
8413 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8414 auto ExpMinusOne = MIRBuilder.buildSub(
8415 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8416 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8417 auto NormalRes =
8419 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8420 if (PartialCheck == fcNegNormal)
8421 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8422 else if (PartialCheck == fcPosNormal) {
8423 auto PosSign = MIRBuilder.buildXor(
8424 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8425 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8426 }
8427 appendToRes(NormalRes);
8428 }
8429
8430 MIRBuilder.buildCopy(DstReg, Res);
8431 MI.eraseFromParent();
8432 return Legalized;
8433}
8434
8436 // Implement G_SELECT in terms of XOR, AND, OR.
8437 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8438 MI.getFirst4RegLLTs();
8439
8440 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8441 if (IsEltPtr) {
8442 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8443 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8444 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8445 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8446 DstTy = NewTy;
8447 }
8448
8449 if (MaskTy.isScalar()) {
8450 // Turn the scalar condition into a vector condition mask if needed.
8451
8452 Register MaskElt = MaskReg;
8453
8454 // The condition was potentially zero extended before, but we want a sign
8455 // extended boolean.
8456 if (MaskTy != LLT::scalar(1))
8457 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8458
8459 // Continue the sign extension (or truncate) to match the data type.
8460 MaskElt =
8461 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8462
8463 if (DstTy.isVector()) {
8464 // Generate a vector splat idiom.
8465 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8466 MaskReg = ShufSplat.getReg(0);
8467 } else {
8468 MaskReg = MaskElt;
8469 }
8470 MaskTy = DstTy;
8471 } else if (!DstTy.isVector()) {
8472 // Cannot handle the case that mask is a vector and dst is a scalar.
8473 return UnableToLegalize;
8474 }
8475
8476 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8477 return UnableToLegalize;
8478 }
8479
8480 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8481 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8482 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8483 if (IsEltPtr) {
8484 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8485 MIRBuilder.buildIntToPtr(DstReg, Or);
8486 } else {
8487 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8488 }
8489 MI.eraseFromParent();
8490 return Legalized;
8491}
8492
8494 // Split DIVREM into individual instructions.
8495 unsigned Opcode = MI.getOpcode();
8496
8498 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8499 : TargetOpcode::G_UDIV,
8500 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8502 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8503 : TargetOpcode::G_UREM,
8504 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8505 MI.eraseFromParent();
8506 return Legalized;
8507}
8508
8511 // Expand %res = G_ABS %a into:
8512 // %v1 = G_ASHR %a, scalar_size-1
8513 // %v2 = G_ADD %a, %v1
8514 // %res = G_XOR %v2, %v1
8515 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8516 Register OpReg = MI.getOperand(1).getReg();
8517 auto ShiftAmt =
8518 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8519 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8520 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8521 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8522 MI.eraseFromParent();
8523 return Legalized;
8524}
8525
8528 // Expand %res = G_ABS %a into:
8529 // %v1 = G_CONSTANT 0
8530 // %v2 = G_SUB %v1, %a
8531 // %res = G_SMAX %a, %v2
8532 Register SrcReg = MI.getOperand(1).getReg();
8533 LLT Ty = MRI.getType(SrcReg);
8534 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8535 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8536 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8537 MI.eraseFromParent();
8538 return Legalized;
8539}
8540
8543 Register SrcReg = MI.getOperand(1).getReg();
8544 Register DestReg = MI.getOperand(0).getReg();
8545 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8546 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8547 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8548 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8549 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8550 MI.eraseFromParent();
8551 return Legalized;
8552}
8553
8556 Register SrcReg = MI.getOperand(1).getReg();
8557 LLT SrcTy = MRI.getType(SrcReg);
8558 LLT DstTy = MRI.getType(SrcReg);
8559
8560 // The source could be a scalar if the IR type was <1 x sN>.
8561 if (SrcTy.isScalar()) {
8562 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8563 return UnableToLegalize; // FIXME: handle extension.
8564 // This can be just a plain copy.
8566 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8568 return Legalized;
8569 }
8570 return UnableToLegalize;
8571}
8572
8574 MachineFunction &MF = *MI.getMF();
8576 LLVMContext &Ctx = MF.getFunction().getContext();
8577 Register ListPtr = MI.getOperand(1).getReg();
8578 LLT PtrTy = MRI.getType(ListPtr);
8579
8580 // LstPtr is a pointer to the head of the list. Get the address
8581 // of the head of the list.
8582 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8583 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8584 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8585 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8586
8587 const Align A(MI.getOperand(2).getImm());
8588 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8589 if (A > TLI.getMinStackArgumentAlignment()) {
8590 Register AlignAmt =
8591 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8592 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8593 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8594 VAList = AndDst.getReg(0);
8595 }
8596
8597 // Increment the pointer, VAList, to the next vaarg
8598 // The list should be bumped by the size of element in the current head of
8599 // list.
8600 Register Dst = MI.getOperand(0).getReg();
8601 LLT LLTTy = MRI.getType(Dst);
8602 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8603 auto IncAmt =
8604 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8605 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8606
8607 // Store the increment VAList to the legalized pointer
8609 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8610 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8611 // Load the actual argument out of the pointer VAList
8612 Align EltAlignment = DL.getABITypeAlign(Ty);
8613 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8614 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8615 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8616
8617 MI.eraseFromParent();
8618 return Legalized;
8619}
8620
8622 // On Darwin, -Os means optimize for size without hurting performance, so
8623 // only really optimize for size when -Oz (MinSize) is used.
8625 return MF.getFunction().hasMinSize();
8626 return MF.getFunction().hasOptSize();
8627}
8628
8629// Returns a list of types to use for memory op lowering in MemOps. A partial
8630// port of findOptimalMemOpLowering in TargetLowering.
8631static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8632 unsigned Limit, const MemOp &Op,
8633 unsigned DstAS, unsigned SrcAS,
8634 const AttributeList &FuncAttributes,
8635 const TargetLowering &TLI) {
8636 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8637 return false;
8638
8639 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8640
8641 if (Ty == LLT()) {
8642 // Use the largest scalar type whose alignment constraints are satisfied.
8643 // We only need to check DstAlign here as SrcAlign is always greater or
8644 // equal to DstAlign (or zero).
8645 Ty = LLT::scalar(64);
8646 if (Op.isFixedDstAlign())
8647 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8648 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8649 Ty = LLT::scalar(Ty.getSizeInBytes());
8650 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8651 // FIXME: check for the largest legal type we can load/store to.
8652 }
8653
8654 unsigned NumMemOps = 0;
8655 uint64_t Size = Op.size();
8656 while (Size) {
8657 unsigned TySize = Ty.getSizeInBytes();
8658 while (TySize > Size) {
8659 // For now, only use non-vector load / store's for the left-over pieces.
8660 LLT NewTy = Ty;
8661 // FIXME: check for mem op safety and legality of the types. Not all of
8662 // SDAGisms map cleanly to GISel concepts.
8663 if (NewTy.isVector())
8664 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8665 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8666 unsigned NewTySize = NewTy.getSizeInBytes();
8667 assert(NewTySize > 0 && "Could not find appropriate type");
8668
8669 // If the new LLT cannot cover all of the remaining bits, then consider
8670 // issuing a (or a pair of) unaligned and overlapping load / store.
8671 unsigned Fast;
8672 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8673 MVT VT = getMVTForLLT(Ty);
8674 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8676 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8678 Fast)
8679 TySize = Size;
8680 else {
8681 Ty = NewTy;
8682 TySize = NewTySize;
8683 }
8684 }
8685
8686 if (++NumMemOps > Limit)
8687 return false;
8688
8689 MemOps.push_back(Ty);
8690 Size -= TySize;
8691 }
8692
8693 return true;
8694}
8695
8696// Get a vectorized representation of the memset value operand, GISel edition.
8698 MachineRegisterInfo &MRI = *MIB.getMRI();
8699 unsigned NumBits = Ty.getScalarSizeInBits();
8700 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8701 if (!Ty.isVector() && ValVRegAndVal) {
8702 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8703 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8704 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8705 }
8706
8707 // Extend the byte value to the larger type, and then multiply by a magic
8708 // value 0x010101... in order to replicate it across every byte.
8709 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8710 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8711 return MIB.buildConstant(Ty, 0).getReg(0);
8712 }
8713
8714 LLT ExtType = Ty.getScalarType();
8715 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8716 if (NumBits > 8) {
8717 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8718 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8719 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8720 }
8721
8722 // For vector types create a G_BUILD_VECTOR.
8723 if (Ty.isVector())
8724 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8725
8726 return Val;
8727}
8728
8730LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8731 uint64_t KnownLen, Align Alignment,
8732 bool IsVolatile) {
8733 auto &MF = *MI.getParent()->getParent();
8734 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8735 auto &DL = MF.getDataLayout();
8736 LLVMContext &C = MF.getFunction().getContext();
8737
8738 assert(KnownLen != 0 && "Have a zero length memset length!");
8739
8740 bool DstAlignCanChange = false;
8741 MachineFrameInfo &MFI = MF.getFrameInfo();
8742 bool OptSize = shouldLowerMemFuncForSize(MF);
8743
8744 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8745 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8746 DstAlignCanChange = true;
8747
8748 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8749 std::vector<LLT> MemOps;
8750
8751 const auto &DstMMO = **MI.memoperands_begin();
8752 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8753
8754 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8755 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8756
8757 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8758 MemOp::Set(KnownLen, DstAlignCanChange,
8759 Alignment,
8760 /*IsZeroMemset=*/IsZeroVal,
8761 /*IsVolatile=*/IsVolatile),
8762 DstPtrInfo.getAddrSpace(), ~0u,
8763 MF.getFunction().getAttributes(), TLI))
8764 return UnableToLegalize;
8765
8766 if (DstAlignCanChange) {
8767 // Get an estimate of the type from the LLT.
8768 Type *IRTy = getTypeForLLT(MemOps[0], C);
8769 Align NewAlign = DL.getABITypeAlign(IRTy);
8770 if (NewAlign > Alignment) {
8771 Alignment = NewAlign;
8772 unsigned FI = FIDef->getOperand(1).getIndex();
8773 // Give the stack frame object a larger alignment if needed.
8774 if (MFI.getObjectAlign(FI) < Alignment)
8775 MFI.setObjectAlignment(FI, Alignment);
8776 }
8777 }
8778
8779 MachineIRBuilder MIB(MI);
8780 // Find the largest store and generate the bit pattern for it.
8781 LLT LargestTy = MemOps[0];
8782 for (unsigned i = 1; i < MemOps.size(); i++)
8783 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8784 LargestTy = MemOps[i];
8785
8786 // The memset stored value is always defined as an s8, so in order to make it
8787 // work with larger store types we need to repeat the bit pattern across the
8788 // wider type.
8789 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8790
8791 if (!MemSetValue)
8792 return UnableToLegalize;
8793
8794 // Generate the stores. For each store type in the list, we generate the
8795 // matching store of that type to the destination address.
8796 LLT PtrTy = MRI.getType(Dst);
8797 unsigned DstOff = 0;
8798 unsigned Size = KnownLen;
8799 for (unsigned I = 0; I < MemOps.size(); I++) {
8800 LLT Ty = MemOps[I];
8801 unsigned TySize = Ty.getSizeInBytes();
8802 if (TySize > Size) {
8803 // Issuing an unaligned load / store pair that overlaps with the previous
8804 // pair. Adjust the offset accordingly.
8805 assert(I == MemOps.size() - 1 && I != 0);
8806 DstOff -= TySize - Size;
8807 }
8808
8809 // If this store is smaller than the largest store see whether we can get
8810 // the smaller value for free with a truncate.
8811 Register Value = MemSetValue;
8812 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8813 MVT VT = getMVTForLLT(Ty);
8814 MVT LargestVT = getMVTForLLT(LargestTy);
8815 if (!LargestTy.isVector() && !Ty.isVector() &&
8816 TLI.isTruncateFree(LargestVT, VT))
8817 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8818 else
8819 Value = getMemsetValue(Val, Ty, MIB);
8820 if (!Value)
8821 return UnableToLegalize;
8822 }
8823
8824 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8825
8826 Register Ptr = Dst;
8827 if (DstOff != 0) {
8828 auto Offset =
8829 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8830 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8831 }
8832
8833 MIB.buildStore(Value, Ptr, *StoreMMO);
8834 DstOff += Ty.getSizeInBytes();
8835 Size -= TySize;
8836 }
8837
8838 MI.eraseFromParent();
8839 return Legalized;
8840}
8841
8843LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8844 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8845
8846 auto [Dst, Src, Len] = MI.getFirst3Regs();
8847
8848 const auto *MMOIt = MI.memoperands_begin();
8849 const MachineMemOperand *MemOp = *MMOIt;
8850 bool IsVolatile = MemOp->isVolatile();
8851
8852 // See if this is a constant length copy
8853 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8854 // FIXME: support dynamically sized G_MEMCPY_INLINE
8855 assert(LenVRegAndVal &&
8856 "inline memcpy with dynamic size is not yet supported");
8857 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8858 if (KnownLen == 0) {
8859 MI.eraseFromParent();
8860 return Legalized;
8861 }
8862
8863 const auto &DstMMO = **MI.memoperands_begin();
8864 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8865 Align DstAlign = DstMMO.getBaseAlign();
8866 Align SrcAlign = SrcMMO.getBaseAlign();
8867
8868 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8869 IsVolatile);
8870}
8871
8873LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8874 uint64_t KnownLen, Align DstAlign,
8875 Align SrcAlign, bool IsVolatile) {
8876 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8877 return lowerMemcpy(MI, Dst, Src, KnownLen,
8878 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8879 IsVolatile);
8880}
8881
8883LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8884 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8885 Align SrcAlign, bool IsVolatile) {
8886 auto &MF = *MI.getParent()->getParent();
8887 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8888 auto &DL = MF.getDataLayout();
8889 LLVMContext &C = MF.getFunction().getContext();
8890
8891 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8892
8893 bool DstAlignCanChange = false;
8894 MachineFrameInfo &MFI = MF.getFrameInfo();
8895 Align Alignment = std::min(DstAlign, SrcAlign);
8896
8897 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8898 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8899 DstAlignCanChange = true;
8900
8901 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8902 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8903 // if the memcpy is in a tail call position.
8904
8905 std::vector<LLT> MemOps;
8906
8907 const auto &DstMMO = **MI.memoperands_begin();
8908 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8909 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8910 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8911
8913 MemOps, Limit,
8914 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8915 IsVolatile),
8916 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8917 MF.getFunction().getAttributes(), TLI))
8918 return UnableToLegalize;
8919
8920 if (DstAlignCanChange) {
8921 // Get an estimate of the type from the LLT.
8922 Type *IRTy = getTypeForLLT(MemOps[0], C);
8923 Align NewAlign = DL.getABITypeAlign(IRTy);
8924
8925 // Don't promote to an alignment that would require dynamic stack
8926 // realignment.
8927 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8928 if (!TRI->hasStackRealignment(MF))
8929 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8930 NewAlign = NewAlign.previous();
8931
8932 if (NewAlign > Alignment) {
8933 Alignment = NewAlign;
8934 unsigned FI = FIDef->getOperand(1).getIndex();
8935 // Give the stack frame object a larger alignment if needed.
8936 if (MFI.getObjectAlign(FI) < Alignment)
8937 MFI.setObjectAlignment(FI, Alignment);
8938 }
8939 }
8940
8941 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8942
8943 MachineIRBuilder MIB(MI);
8944 // Now we need to emit a pair of load and stores for each of the types we've
8945 // collected. I.e. for each type, generate a load from the source pointer of
8946 // that type width, and then generate a corresponding store to the dest buffer
8947 // of that value loaded. This can result in a sequence of loads and stores
8948 // mixed types, depending on what the target specifies as good types to use.
8949 unsigned CurrOffset = 0;
8950 unsigned Size = KnownLen;
8951 for (auto CopyTy : MemOps) {
8952 // Issuing an unaligned load / store pair that overlaps with the previous
8953 // pair. Adjust the offset accordingly.
8954 if (CopyTy.getSizeInBytes() > Size)
8955 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8956
8957 // Construct MMOs for the accesses.
8958 auto *LoadMMO =
8959 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8960 auto *StoreMMO =
8961 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8962
8963 // Create the load.
8964 Register LoadPtr = Src;
8966 if (CurrOffset != 0) {
8967 LLT SrcTy = MRI.getType(Src);
8968 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8969 .getReg(0);
8970 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8971 }
8972 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8973
8974 // Create the store.
8975 Register StorePtr = Dst;
8976 if (CurrOffset != 0) {
8977 LLT DstTy = MRI.getType(Dst);
8978 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8979 }
8980 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8981 CurrOffset += CopyTy.getSizeInBytes();
8982 Size -= CopyTy.getSizeInBytes();
8983 }
8984
8985 MI.eraseFromParent();
8986 return Legalized;
8987}
8988
8990LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8991 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8992 bool IsVolatile) {
8993 auto &MF = *MI.getParent()->getParent();
8994 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8995 auto &DL = MF.getDataLayout();
8996 LLVMContext &C = MF.getFunction().getContext();
8997
8998 assert(KnownLen != 0 && "Have a zero length memmove length!");
8999
9000 bool DstAlignCanChange = false;
9001 MachineFrameInfo &MFI = MF.getFrameInfo();
9002 bool OptSize = shouldLowerMemFuncForSize(MF);
9003 Align Alignment = std::min(DstAlign, SrcAlign);
9004
9005 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9006 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9007 DstAlignCanChange = true;
9008
9009 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9010 std::vector<LLT> MemOps;
9011
9012 const auto &DstMMO = **MI.memoperands_begin();
9013 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9014 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9015 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9016
9017 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9018 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9019 // same thing here.
9021 MemOps, Limit,
9022 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9023 /*IsVolatile*/ true),
9024 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9025 MF.getFunction().getAttributes(), TLI))
9026 return UnableToLegalize;
9027
9028 if (DstAlignCanChange) {
9029 // Get an estimate of the type from the LLT.
9030 Type *IRTy = getTypeForLLT(MemOps[0], C);
9031 Align NewAlign = DL.getABITypeAlign(IRTy);
9032
9033 // Don't promote to an alignment that would require dynamic stack
9034 // realignment.
9035 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9036 if (!TRI->hasStackRealignment(MF))
9037 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
9038 NewAlign = NewAlign.previous();
9039
9040 if (NewAlign > Alignment) {
9041 Alignment = NewAlign;
9042 unsigned FI = FIDef->getOperand(1).getIndex();
9043 // Give the stack frame object a larger alignment if needed.
9044 if (MFI.getObjectAlign(FI) < Alignment)
9045 MFI.setObjectAlignment(FI, Alignment);
9046 }
9047 }
9048
9049 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9050
9051 MachineIRBuilder MIB(MI);
9052 // Memmove requires that we perform the loads first before issuing the stores.
9053 // Apart from that, this loop is pretty much doing the same thing as the
9054 // memcpy codegen function.
9055 unsigned CurrOffset = 0;
9057 for (auto CopyTy : MemOps) {
9058 // Construct MMO for the load.
9059 auto *LoadMMO =
9060 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9061
9062 // Create the load.
9063 Register LoadPtr = Src;
9064 if (CurrOffset != 0) {
9065 LLT SrcTy = MRI.getType(Src);
9066 auto Offset =
9067 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9068 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9069 }
9070 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9071 CurrOffset += CopyTy.getSizeInBytes();
9072 }
9073
9074 CurrOffset = 0;
9075 for (unsigned I = 0; I < MemOps.size(); ++I) {
9076 LLT CopyTy = MemOps[I];
9077 // Now store the values loaded.
9078 auto *StoreMMO =
9079 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9080
9081 Register StorePtr = Dst;
9082 if (CurrOffset != 0) {
9083 LLT DstTy = MRI.getType(Dst);
9084 auto Offset =
9085 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9086 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9087 }
9088 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9089 CurrOffset += CopyTy.getSizeInBytes();
9090 }
9091 MI.eraseFromParent();
9092 return Legalized;
9093}
9094
9097 const unsigned Opc = MI.getOpcode();
9098 // This combine is fairly complex so it's not written with a separate
9099 // matcher function.
9100 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9101 Opc == TargetOpcode::G_MEMSET) &&
9102 "Expected memcpy like instruction");
9103
9104 auto MMOIt = MI.memoperands_begin();
9105 const MachineMemOperand *MemOp = *MMOIt;
9106
9107 Align DstAlign = MemOp->getBaseAlign();
9108 Align SrcAlign;
9109 auto [Dst, Src, Len] = MI.getFirst3Regs();
9110
9111 if (Opc != TargetOpcode::G_MEMSET) {
9112 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9113 MemOp = *(++MMOIt);
9114 SrcAlign = MemOp->getBaseAlign();
9115 }
9116
9117 // See if this is a constant length copy
9118 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9119 if (!LenVRegAndVal)
9120 return UnableToLegalize;
9121 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9122
9123 if (KnownLen == 0) {
9124 MI.eraseFromParent();
9125 return Legalized;
9126 }
9127
9128 bool IsVolatile = MemOp->isVolatile();
9129 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9130 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9131 IsVolatile);
9132
9133 // Don't try to optimize volatile.
9134 if (IsVolatile)
9135 return UnableToLegalize;
9136
9137 if (MaxLen && KnownLen > MaxLen)
9138 return UnableToLegalize;
9139
9140 if (Opc == TargetOpcode::G_MEMCPY) {
9141 auto &MF = *MI.getParent()->getParent();
9142 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9143 bool OptSize = shouldLowerMemFuncForSize(MF);
9144 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9145 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9146 IsVolatile);
9147 }
9148 if (Opc == TargetOpcode::G_MEMMOVE)
9149 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9150 if (Opc == TargetOpcode::G_MEMSET)
9151 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9152 return UnableToLegalize;
9153}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1243
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1044
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1004
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
void negate()
Negate this APInt in place.
Definition: APInt.h:1430
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:853
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:250
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:820
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
bool isSigned() const
Definition: InstrTypes.h:1007
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:230
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:940
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:244
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1974
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1516
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1561
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1165
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:366
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:493
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:235
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:334
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1253
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:597
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)