LLVM 20.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
460 case TargetOpcode::G_FSINH:
461 RTLIBCASE(SINH_F);
462 case TargetOpcode::G_FCOSH:
463 RTLIBCASE(COSH_F);
464 case TargetOpcode::G_FTANH:
465 RTLIBCASE(TANH_F);
466 case TargetOpcode::G_FLOG10:
467 RTLIBCASE(LOG10_F);
468 case TargetOpcode::G_FLOG:
469 RTLIBCASE(LOG_F);
470 case TargetOpcode::G_FLOG2:
471 RTLIBCASE(LOG2_F);
472 case TargetOpcode::G_FLDEXP:
473 RTLIBCASE(LDEXP_F);
474 case TargetOpcode::G_FCEIL:
475 RTLIBCASE(CEIL_F);
476 case TargetOpcode::G_FFLOOR:
477 RTLIBCASE(FLOOR_F);
478 case TargetOpcode::G_FMINNUM:
479 RTLIBCASE(FMIN_F);
480 case TargetOpcode::G_FMAXNUM:
481 RTLIBCASE(FMAX_F);
482 case TargetOpcode::G_FSQRT:
483 RTLIBCASE(SQRT_F);
484 case TargetOpcode::G_FRINT:
485 RTLIBCASE(RINT_F);
486 case TargetOpcode::G_FNEARBYINT:
487 RTLIBCASE(NEARBYINT_F);
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
489 RTLIBCASE(ROUNDEVEN_F);
490 case TargetOpcode::G_INTRINSIC_LRINT:
491 RTLIBCASE(LRINT_F);
492 case TargetOpcode::G_INTRINSIC_LLRINT:
493 RTLIBCASE(LLRINT_F);
494 }
495 llvm_unreachable("Unknown libcall function");
496}
497
498/// True if an instruction is in tail position in its caller. Intended for
499/// legalizing libcalls as tail calls when possible.
502 const TargetInstrInfo &TII,
504 MachineBasicBlock &MBB = *MI.getParent();
505 const Function &F = MBB.getParent()->getFunction();
506
507 // Conservatively require the attributes of the call to match those of
508 // the return. Ignore NoAlias and NonNull because they don't affect the
509 // call sequence.
510 AttributeList CallerAttrs = F.getAttributes();
511 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
512 .removeAttribute(Attribute::NoAlias)
513 .removeAttribute(Attribute::NonNull)
514 .hasAttributes())
515 return false;
516
517 // It's not safe to eliminate the sign / zero extension of the return value.
518 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
519 CallerAttrs.hasRetAttr(Attribute::SExt))
520 return false;
521
522 // Only tail call if the following instruction is a standard return or if we
523 // have a `thisreturn` callee, and a sequence like:
524 //
525 // G_MEMCPY %0, %1, %2
526 // $x0 = COPY %0
527 // RET_ReallyLR implicit $x0
528 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
529 if (Next != MBB.instr_end() && Next->isCopy()) {
530 if (MI.getOpcode() == TargetOpcode::G_BZERO)
531 return false;
532
533 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
534 // mempy/etc routines return the same parameter. For other it will be the
535 // returned value.
536 Register VReg = MI.getOperand(0).getReg();
537 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
538 return false;
539
540 Register PReg = Next->getOperand(0).getReg();
541 if (!PReg.isPhysical())
542 return false;
543
544 auto Ret = next_nodbg(Next, MBB.instr_end());
545 if (Ret == MBB.instr_end() || !Ret->isReturn())
546 return false;
547
548 if (Ret->getNumImplicitOperands() != 1)
549 return false;
550
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
552 return false;
553
554 // Skip over the COPY that we just validated.
555 Next = Ret;
556 }
557
558 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
559 return false;
560
561 return true;
562}
563
566 const CallLowering::ArgInfo &Result,
568 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
569 MachineInstr *MI) {
570 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
571
573 Info.CallConv = CC;
575 Info.OrigRet = Result;
576 if (MI)
577 Info.IsTailCall =
578 (Result.Ty->isVoidTy() ||
579 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
580 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
581 *MIRBuilder.getMRI());
582
583 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder, Info))
586
587 if (MI && Info.LoweredTailCall) {
588 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
589
590 // Check debug locations before removing the return.
591 LocObserver.checkpoint(true);
592
593 // We must have a return following the call (or debug insts) to get past
594 // isLibCallInTailPosition.
595 do {
596 MachineInstr *Next = MI->getNextNode();
597 assert(Next &&
598 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
599 "Expected instr following MI to be return or debug inst?");
600 // We lowered a tail call, so the call is now the return from the block.
601 // Delete the old return.
602 Next->eraseFromParent();
603 } while (MI->getNextNode());
604
605 // We expect to lose the debug location from the return.
606 LocObserver.checkpoint(false);
607 }
609}
610
613 const CallLowering::ArgInfo &Result,
615 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
616 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
617 const char *Name = TLI.getLibcallName(Libcall);
618 if (!Name)
620 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
621 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
622}
623
624// Useful for libcalls where all operands have the same type.
627 Type *OpType, LostDebugLocObserver &LocObserver) {
628 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
629
630 // FIXME: What does the original arg index mean here?
632 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
633 Args.push_back({MO.getReg(), OpType, 0});
634 return createLibcall(MIRBuilder, Libcall,
635 {MI.getOperand(0).getReg(), OpType, 0}, Args,
636 LocObserver, &MI);
637}
638
641 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
642 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
643
645 // Add all the args, except for the last which is an imm denoting 'tail'.
646 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
647 Register Reg = MI.getOperand(i).getReg();
648
649 // Need derive an IR type for call lowering.
650 LLT OpLLT = MRI.getType(Reg);
651 Type *OpTy = nullptr;
652 if (OpLLT.isPointer())
653 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
654 else
655 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
656 Args.push_back({Reg, OpTy, 0});
657 }
658
659 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
660 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
661 RTLIB::Libcall RTLibcall;
662 unsigned Opc = MI.getOpcode();
663 switch (Opc) {
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
666 break;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
670 break;
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
674 break;
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
678 break;
679 default:
680 llvm_unreachable("unsupported opcode");
681 }
682 const char *Name = TLI.getLibcallName(RTLibcall);
683
684 // Unsupported libcall on the target.
685 if (!Name) {
686 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
687 << MIRBuilder.getTII().getName(Opc) << "\n");
689 }
690
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
694 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
695 Info.IsTailCall =
696 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
697 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
698
699 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder, Info))
702
703 if (Info.LoweredTailCall) {
704 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
705
706 // Check debug locations before removing the return.
707 LocObserver.checkpoint(true);
708
709 // We must have a return following the call (or debug insts) to get past
710 // isLibCallInTailPosition.
711 do {
712 MachineInstr *Next = MI.getNextNode();
713 assert(Next &&
714 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
715 "Expected instr following MI to be return or debug inst?");
716 // We lowered a tail call, so the call is now the return from the block.
717 // Delete the old return.
718 Next->eraseFromParent();
719 } while (MI.getNextNode());
720
721 // We expect to lose the debug location from the return.
722 LocObserver.checkpoint(false);
723 }
724
726}
727
729 unsigned Opc = MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
734 uint64_t MemSize = MemType.getSizeInBytes();
735 if (MemType.isVector())
736 return RTLIB::UNKNOWN_LIBCALL;
737
738#define LCALLS(A, B) \
739 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
740#define LCALL5(A) \
741 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
742 switch (Opc) {
743 case TargetOpcode::G_ATOMIC_CMPXCHG:
744 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
745 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
746 return getOutlineAtomicHelper(LC, Ordering, MemSize);
747 }
748 case TargetOpcode::G_ATOMICRMW_XCHG: {
749 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
750 return getOutlineAtomicHelper(LC, Ordering, MemSize);
751 }
752 case TargetOpcode::G_ATOMICRMW_ADD:
753 case TargetOpcode::G_ATOMICRMW_SUB: {
754 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
756 }
757 case TargetOpcode::G_ATOMICRMW_AND: {
758 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
759 return getOutlineAtomicHelper(LC, Ordering, MemSize);
760 }
761 case TargetOpcode::G_ATOMICRMW_OR: {
762 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
763 return getOutlineAtomicHelper(LC, Ordering, MemSize);
764 }
765 case TargetOpcode::G_ATOMICRMW_XOR: {
766 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
767 return getOutlineAtomicHelper(LC, Ordering, MemSize);
768 }
769 default:
770 return RTLIB::UNKNOWN_LIBCALL;
771 }
772#undef LCALLS
773#undef LCALL5
774}
775
778 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
779
780 Type *RetTy;
781 SmallVector<Register> RetRegs;
783 unsigned Opc = MI.getOpcode();
784 switch (Opc) {
785 case TargetOpcode::G_ATOMIC_CMPXCHG:
786 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
788 LLT SuccessLLT;
789 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
790 MI.getFirst4RegLLTs();
791 RetRegs.push_back(Ret);
792 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
793 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
794 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
795 NewLLT) = MI.getFirst5RegLLTs();
796 RetRegs.push_back(Success);
798 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
799 }
800 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
801 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
802 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
803 break;
804 }
805 case TargetOpcode::G_ATOMICRMW_XCHG:
806 case TargetOpcode::G_ATOMICRMW_ADD:
807 case TargetOpcode::G_ATOMICRMW_SUB:
808 case TargetOpcode::G_ATOMICRMW_AND:
809 case TargetOpcode::G_ATOMICRMW_OR:
810 case TargetOpcode::G_ATOMICRMW_XOR: {
811 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
812 RetRegs.push_back(Ret);
813 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
814 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
815 Val =
816 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
817 .getReg(0);
818 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
819 Val =
820 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
821 .getReg(0);
822 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
823 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
824 break;
825 }
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
831 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
833 const char *Name = TLI.getLibcallName(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (!Name) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
845 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
846
847 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
848 if (!CLI.lowerCall(MIRBuilder, Info))
850
852}
853
854static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
855 Type *FromType) {
856 auto ToMVT = MVT::getVT(ToType);
857 auto FromMVT = MVT::getVT(FromType);
858
859 switch (Opcode) {
860 case TargetOpcode::G_FPEXT:
861 return RTLIB::getFPEXT(FromMVT, ToMVT);
862 case TargetOpcode::G_FPTRUNC:
863 return RTLIB::getFPROUND(FromMVT, ToMVT);
864 case TargetOpcode::G_FPTOSI:
865 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
866 case TargetOpcode::G_FPTOUI:
867 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
868 case TargetOpcode::G_SITOFP:
869 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
870 case TargetOpcode::G_UITOFP:
871 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
872 }
873 llvm_unreachable("Unsupported libcall function");
874}
875
878 Type *FromType, LostDebugLocObserver &LocObserver) {
879 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
880 return createLibcall(
881 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
882 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
883}
884
885static RTLIB::Libcall
887 RTLIB::Libcall RTLibcall;
888 switch (MI.getOpcode()) {
889 case TargetOpcode::G_GET_FPENV:
890 RTLibcall = RTLIB::FEGETENV;
891 break;
892 case TargetOpcode::G_SET_FPENV:
893 case TargetOpcode::G_RESET_FPENV:
894 RTLibcall = RTLIB::FESETENV;
895 break;
896 case TargetOpcode::G_GET_FPMODE:
897 RTLibcall = RTLIB::FEGETMODE;
898 break;
899 case TargetOpcode::G_SET_FPMODE:
900 case TargetOpcode::G_RESET_FPMODE:
901 RTLibcall = RTLIB::FESETMODE;
902 break;
903 default:
904 llvm_unreachable("Unexpected opcode");
905 }
906 return RTLibcall;
907}
908
909// Some library functions that read FP state (fegetmode, fegetenv) write the
910// state into a region in memory. IR intrinsics that do the same operations
911// (get_fpmode, get_fpenv) return the state as integer value. To implement these
912// intrinsics via the library functions, we need to use temporary variable,
913// for example:
914//
915// %0:_(s32) = G_GET_FPMODE
916//
917// is transformed to:
918//
919// %1:_(p0) = G_FRAME_INDEX %stack.0
920// BL &fegetmode
921// %0:_(s32) = G_LOAD % 1
922//
924LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
926 LostDebugLocObserver &LocObserver) {
928 auto &MF = MIRBuilder.getMF();
929 auto &MRI = *MIRBuilder.getMRI();
930 auto &Ctx = MF.getFunction().getContext();
931
932 // Create temporary, where library function will put the read state.
933 Register Dst = MI.getOperand(0).getReg();
934 LLT StateTy = MRI.getType(Dst);
935 TypeSize StateSize = StateTy.getSizeInBytes();
937 MachinePointerInfo TempPtrInfo;
938 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
939
940 // Create a call to library function, with the temporary as an argument.
941 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
942 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
944 auto Res =
945 createLibcall(MIRBuilder, RTLibcall,
947 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
948 LocObserver, nullptr);
950 return Res;
951
952 // Create a load from the temporary.
953 MachineMemOperand *MMO = MF.getMachineMemOperand(
954 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
955 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
956
958}
959
960// Similar to `createGetStateLibcall` the function calls a library function
961// using transient space in stack. In this case the library function reads
962// content of memory region.
964LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
966 LostDebugLocObserver &LocObserver) {
968 auto &MF = MIRBuilder.getMF();
969 auto &MRI = *MIRBuilder.getMRI();
970 auto &Ctx = MF.getFunction().getContext();
971
972 // Create temporary, where library function will get the new state.
973 Register Src = MI.getOperand(0).getReg();
974 LLT StateTy = MRI.getType(Src);
975 TypeSize StateSize = StateTy.getSizeInBytes();
977 MachinePointerInfo TempPtrInfo;
978 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
979
980 // Put the new state into the temporary.
981 MachineMemOperand *MMO = MF.getMachineMemOperand(
982 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
983 MIRBuilder.buildStore(Src, Temp, *MMO);
984
985 // Create a call to library function, with the temporary as an argument.
986 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
987 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
989 return createLibcall(MIRBuilder, RTLibcall,
991 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
992 LocObserver, nullptr);
993}
994
995// The function is used to legalize operations that set default environment
996// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
997// On most targets supported in glibc FE_DFL_MODE is defined as
998// `((const femode_t *) -1)`. Such assumption is used here. If for some target
999// it is not true, the target must provide custom lowering.
1001LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1003 LostDebugLocObserver &LocObserver) {
1005 auto &MF = MIRBuilder.getMF();
1006 auto &Ctx = MF.getFunction().getContext();
1007
1008 // Create an argument for the library function.
1009 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1010 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1011 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1012 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1013 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1014 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1015 MIRBuilder.buildIntToPtr(Dest, DefValue);
1016
1018 return createLibcall(MIRBuilder, RTLibcall,
1020 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1021 LocObserver, &MI);
1022}
1023
1026 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1027
1028 switch (MI.getOpcode()) {
1029 default:
1030 return UnableToLegalize;
1031 case TargetOpcode::G_MUL:
1032 case TargetOpcode::G_SDIV:
1033 case TargetOpcode::G_UDIV:
1034 case TargetOpcode::G_SREM:
1035 case TargetOpcode::G_UREM:
1036 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1037 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1038 unsigned Size = LLTy.getSizeInBits();
1039 Type *HLTy = IntegerType::get(Ctx, Size);
1040 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1041 if (Status != Legalized)
1042 return Status;
1043 break;
1044 }
1045 case TargetOpcode::G_FADD:
1046 case TargetOpcode::G_FSUB:
1047 case TargetOpcode::G_FMUL:
1048 case TargetOpcode::G_FDIV:
1049 case TargetOpcode::G_FMA:
1050 case TargetOpcode::G_FPOW:
1051 case TargetOpcode::G_FREM:
1052 case TargetOpcode::G_FCOS:
1053 case TargetOpcode::G_FSIN:
1054 case TargetOpcode::G_FTAN:
1055 case TargetOpcode::G_FACOS:
1056 case TargetOpcode::G_FASIN:
1057 case TargetOpcode::G_FATAN:
1058 case TargetOpcode::G_FCOSH:
1059 case TargetOpcode::G_FSINH:
1060 case TargetOpcode::G_FTANH:
1061 case TargetOpcode::G_FLOG10:
1062 case TargetOpcode::G_FLOG:
1063 case TargetOpcode::G_FLOG2:
1064 case TargetOpcode::G_FLDEXP:
1065 case TargetOpcode::G_FEXP:
1066 case TargetOpcode::G_FEXP2:
1067 case TargetOpcode::G_FEXP10:
1068 case TargetOpcode::G_FCEIL:
1069 case TargetOpcode::G_FFLOOR:
1070 case TargetOpcode::G_FMINNUM:
1071 case TargetOpcode::G_FMAXNUM:
1072 case TargetOpcode::G_FSQRT:
1073 case TargetOpcode::G_FRINT:
1074 case TargetOpcode::G_FNEARBYINT:
1075 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1076 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1077 unsigned Size = LLTy.getSizeInBits();
1078 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1079 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1080 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1081 return UnableToLegalize;
1082 }
1083 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1084 if (Status != Legalized)
1085 return Status;
1086 break;
1087 }
1088 case TargetOpcode::G_INTRINSIC_LRINT:
1089 case TargetOpcode::G_INTRINSIC_LLRINT: {
1090 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1091 unsigned Size = LLTy.getSizeInBits();
1092 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1093 Type *ITy = IntegerType::get(
1094 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1095 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1096 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1097 return UnableToLegalize;
1098 }
1099 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1101 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1102 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1103 if (Status != Legalized)
1104 return Status;
1105 MI.eraseFromParent();
1106 return Legalized;
1107 }
1108 case TargetOpcode::G_FPOWI: {
1109 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1110 unsigned Size = LLTy.getSizeInBits();
1111 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1112 Type *ITy = IntegerType::get(
1113 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1114 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1115 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1116 return UnableToLegalize;
1117 }
1118 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1119 std::initializer_list<CallLowering::ArgInfo> Args = {
1120 {MI.getOperand(1).getReg(), HLTy, 0},
1121 {MI.getOperand(2).getReg(), ITy, 1}};
1123 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1124 Args, LocObserver, &MI);
1125 if (Status != Legalized)
1126 return Status;
1127 break;
1128 }
1129 case TargetOpcode::G_FPEXT:
1130 case TargetOpcode::G_FPTRUNC: {
1131 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1132 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1133 if (!FromTy || !ToTy)
1134 return UnableToLegalize;
1136 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1137 if (Status != Legalized)
1138 return Status;
1139 break;
1140 }
1141 case TargetOpcode::G_FPTOSI:
1142 case TargetOpcode::G_FPTOUI: {
1143 // FIXME: Support other types
1144 Type *FromTy =
1145 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1146 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1147 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1148 return UnableToLegalize;
1150 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
1151 if (Status != Legalized)
1152 return Status;
1153 break;
1154 }
1155 case TargetOpcode::G_SITOFP:
1156 case TargetOpcode::G_UITOFP: {
1157 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1158 Type *ToTy =
1159 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1160 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1161 return UnableToLegalize;
1163 MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver);
1164 if (Status != Legalized)
1165 return Status;
1166 break;
1167 }
1168 case TargetOpcode::G_ATOMICRMW_XCHG:
1169 case TargetOpcode::G_ATOMICRMW_ADD:
1170 case TargetOpcode::G_ATOMICRMW_SUB:
1171 case TargetOpcode::G_ATOMICRMW_AND:
1172 case TargetOpcode::G_ATOMICRMW_OR:
1173 case TargetOpcode::G_ATOMICRMW_XOR:
1174 case TargetOpcode::G_ATOMIC_CMPXCHG:
1175 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1177 if (Status != Legalized)
1178 return Status;
1179 break;
1180 }
1181 case TargetOpcode::G_BZERO:
1182 case TargetOpcode::G_MEMCPY:
1183 case TargetOpcode::G_MEMMOVE:
1184 case TargetOpcode::G_MEMSET: {
1185 LegalizeResult Result =
1186 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1187 if (Result != Legalized)
1188 return Result;
1189 MI.eraseFromParent();
1190 return Result;
1191 }
1192 case TargetOpcode::G_GET_FPENV:
1193 case TargetOpcode::G_GET_FPMODE: {
1194 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1195 if (Result != Legalized)
1196 return Result;
1197 break;
1198 }
1199 case TargetOpcode::G_SET_FPENV:
1200 case TargetOpcode::G_SET_FPMODE: {
1201 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1202 if (Result != Legalized)
1203 return Result;
1204 break;
1205 }
1206 case TargetOpcode::G_RESET_FPENV:
1207 case TargetOpcode::G_RESET_FPMODE: {
1208 LegalizeResult Result =
1209 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1210 if (Result != Legalized)
1211 return Result;
1212 break;
1213 }
1214 }
1215
1216 MI.eraseFromParent();
1217 return Legalized;
1218}
1219
1221 unsigned TypeIdx,
1222 LLT NarrowTy) {
1223 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1224 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1225
1226 switch (MI.getOpcode()) {
1227 default:
1228 return UnableToLegalize;
1229 case TargetOpcode::G_IMPLICIT_DEF: {
1230 Register DstReg = MI.getOperand(0).getReg();
1231 LLT DstTy = MRI.getType(DstReg);
1232
1233 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1234 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1235 // FIXME: Although this would also be legal for the general case, it causes
1236 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1237 // combines not being hit). This seems to be a problem related to the
1238 // artifact combiner.
1239 if (SizeOp0 % NarrowSize != 0) {
1240 LLT ImplicitTy = NarrowTy;
1241 if (DstTy.isVector())
1242 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1243
1244 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1245 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1246
1247 MI.eraseFromParent();
1248 return Legalized;
1249 }
1250
1251 int NumParts = SizeOp0 / NarrowSize;
1252
1254 for (int i = 0; i < NumParts; ++i)
1255 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1256
1257 if (DstTy.isVector())
1258 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1259 else
1260 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1261 MI.eraseFromParent();
1262 return Legalized;
1263 }
1264 case TargetOpcode::G_CONSTANT: {
1265 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1266 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1267 unsigned TotalSize = Ty.getSizeInBits();
1268 unsigned NarrowSize = NarrowTy.getSizeInBits();
1269 int NumParts = TotalSize / NarrowSize;
1270
1271 SmallVector<Register, 4> PartRegs;
1272 for (int I = 0; I != NumParts; ++I) {
1273 unsigned Offset = I * NarrowSize;
1274 auto K = MIRBuilder.buildConstant(NarrowTy,
1275 Val.lshr(Offset).trunc(NarrowSize));
1276 PartRegs.push_back(K.getReg(0));
1277 }
1278
1279 LLT LeftoverTy;
1280 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1281 SmallVector<Register, 1> LeftoverRegs;
1282 if (LeftoverBits != 0) {
1283 LeftoverTy = LLT::scalar(LeftoverBits);
1284 auto K = MIRBuilder.buildConstant(
1285 LeftoverTy,
1286 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1287 LeftoverRegs.push_back(K.getReg(0));
1288 }
1289
1290 insertParts(MI.getOperand(0).getReg(),
1291 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1292
1293 MI.eraseFromParent();
1294 return Legalized;
1295 }
1296 case TargetOpcode::G_SEXT:
1297 case TargetOpcode::G_ZEXT:
1298 case TargetOpcode::G_ANYEXT:
1299 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1300 case TargetOpcode::G_TRUNC: {
1301 if (TypeIdx != 1)
1302 return UnableToLegalize;
1303
1304 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1305 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1306 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1307 return UnableToLegalize;
1308 }
1309
1310 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1311 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1312 MI.eraseFromParent();
1313 return Legalized;
1314 }
1315 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1316 case TargetOpcode::G_FREEZE: {
1317 if (TypeIdx != 0)
1318 return UnableToLegalize;
1319
1320 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1321 // Should widen scalar first
1322 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1323 return UnableToLegalize;
1324
1325 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1327 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1328 Parts.push_back(
1329 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1330 .getReg(0));
1331 }
1332
1333 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1334 MI.eraseFromParent();
1335 return Legalized;
1336 }
1337 case TargetOpcode::G_ADD:
1338 case TargetOpcode::G_SUB:
1339 case TargetOpcode::G_SADDO:
1340 case TargetOpcode::G_SSUBO:
1341 case TargetOpcode::G_SADDE:
1342 case TargetOpcode::G_SSUBE:
1343 case TargetOpcode::G_UADDO:
1344 case TargetOpcode::G_USUBO:
1345 case TargetOpcode::G_UADDE:
1346 case TargetOpcode::G_USUBE:
1347 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1348 case TargetOpcode::G_MUL:
1349 case TargetOpcode::G_UMULH:
1350 return narrowScalarMul(MI, NarrowTy);
1351 case TargetOpcode::G_EXTRACT:
1352 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1353 case TargetOpcode::G_INSERT:
1354 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1355 case TargetOpcode::G_LOAD: {
1356 auto &LoadMI = cast<GLoad>(MI);
1357 Register DstReg = LoadMI.getDstReg();
1358 LLT DstTy = MRI.getType(DstReg);
1359 if (DstTy.isVector())
1360 return UnableToLegalize;
1361
1362 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1363 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1364 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1365 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1366 LoadMI.eraseFromParent();
1367 return Legalized;
1368 }
1369
1370 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1371 }
1372 case TargetOpcode::G_ZEXTLOAD:
1373 case TargetOpcode::G_SEXTLOAD: {
1374 auto &LoadMI = cast<GExtLoad>(MI);
1375 Register DstReg = LoadMI.getDstReg();
1376 Register PtrReg = LoadMI.getPointerReg();
1377
1378 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1379 auto &MMO = LoadMI.getMMO();
1380 unsigned MemSize = MMO.getSizeInBits().getValue();
1381
1382 if (MemSize == NarrowSize) {
1383 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1384 } else if (MemSize < NarrowSize) {
1385 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1386 } else if (MemSize > NarrowSize) {
1387 // FIXME: Need to split the load.
1388 return UnableToLegalize;
1389 }
1390
1391 if (isa<GZExtLoad>(LoadMI))
1392 MIRBuilder.buildZExt(DstReg, TmpReg);
1393 else
1394 MIRBuilder.buildSExt(DstReg, TmpReg);
1395
1396 LoadMI.eraseFromParent();
1397 return Legalized;
1398 }
1399 case TargetOpcode::G_STORE: {
1400 auto &StoreMI = cast<GStore>(MI);
1401
1402 Register SrcReg = StoreMI.getValueReg();
1403 LLT SrcTy = MRI.getType(SrcReg);
1404 if (SrcTy.isVector())
1405 return UnableToLegalize;
1406
1407 int NumParts = SizeOp0 / NarrowSize;
1408 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1409 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1410 if (SrcTy.isVector() && LeftoverBits != 0)
1411 return UnableToLegalize;
1412
1413 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1414 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1415 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1416 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1417 StoreMI.eraseFromParent();
1418 return Legalized;
1419 }
1420
1421 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1422 }
1423 case TargetOpcode::G_SELECT:
1424 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1425 case TargetOpcode::G_AND:
1426 case TargetOpcode::G_OR:
1427 case TargetOpcode::G_XOR: {
1428 // Legalize bitwise operation:
1429 // A = BinOp<Ty> B, C
1430 // into:
1431 // B1, ..., BN = G_UNMERGE_VALUES B
1432 // C1, ..., CN = G_UNMERGE_VALUES C
1433 // A1 = BinOp<Ty/N> B1, C2
1434 // ...
1435 // AN = BinOp<Ty/N> BN, CN
1436 // A = G_MERGE_VALUES A1, ..., AN
1437 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1438 }
1439 case TargetOpcode::G_SHL:
1440 case TargetOpcode::G_LSHR:
1441 case TargetOpcode::G_ASHR:
1442 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1443 case TargetOpcode::G_CTLZ:
1444 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1445 case TargetOpcode::G_CTTZ:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1447 case TargetOpcode::G_CTPOP:
1448 if (TypeIdx == 1)
1449 switch (MI.getOpcode()) {
1450 case TargetOpcode::G_CTLZ:
1451 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1452 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1453 case TargetOpcode::G_CTTZ:
1454 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1455 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1456 case TargetOpcode::G_CTPOP:
1457 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1458 default:
1459 return UnableToLegalize;
1460 }
1461
1463 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1465 return Legalized;
1466 case TargetOpcode::G_INTTOPTR:
1467 if (TypeIdx != 1)
1468 return UnableToLegalize;
1469
1471 narrowScalarSrc(MI, NarrowTy, 1);
1473 return Legalized;
1474 case TargetOpcode::G_PTRTOINT:
1475 if (TypeIdx != 0)
1476 return UnableToLegalize;
1477
1479 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1481 return Legalized;
1482 case TargetOpcode::G_PHI: {
1483 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1484 // NarrowSize.
1485 if (SizeOp0 % NarrowSize != 0)
1486 return UnableToLegalize;
1487
1488 unsigned NumParts = SizeOp0 / NarrowSize;
1489 SmallVector<Register, 2> DstRegs(NumParts);
1490 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1492 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1493 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1495 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1496 SrcRegs[i / 2], MIRBuilder, MRI);
1497 }
1498 MachineBasicBlock &MBB = *MI.getParent();
1500 for (unsigned i = 0; i < NumParts; ++i) {
1501 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1503 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1504 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1505 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1506 }
1508 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1510 MI.eraseFromParent();
1511 return Legalized;
1512 }
1513 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1514 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1515 if (TypeIdx != 2)
1516 return UnableToLegalize;
1517
1518 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1520 narrowScalarSrc(MI, NarrowTy, OpIdx);
1522 return Legalized;
1523 }
1524 case TargetOpcode::G_ICMP: {
1525 Register LHS = MI.getOperand(2).getReg();
1526 LLT SrcTy = MRI.getType(LHS);
1527 uint64_t SrcSize = SrcTy.getSizeInBits();
1528 CmpInst::Predicate Pred =
1529 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1530
1531 // TODO: Handle the non-equality case for weird sizes.
1532 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1533 return UnableToLegalize;
1534
1535 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1536 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1537 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1538 LHSLeftoverRegs, MIRBuilder, MRI))
1539 return UnableToLegalize;
1540
1541 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1542 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1543 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1544 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1545 return UnableToLegalize;
1546
1547 // We now have the LHS and RHS of the compare split into narrow-type
1548 // registers, plus potentially some leftover type.
1549 Register Dst = MI.getOperand(0).getReg();
1550 LLT ResTy = MRI.getType(Dst);
1551 if (ICmpInst::isEquality(Pred)) {
1552 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1553 // them together. For each equal part, the result should be all 0s. For
1554 // each non-equal part, we'll get at least one 1.
1555 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1557 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1558 auto LHS = std::get<0>(LHSAndRHS);
1559 auto RHS = std::get<1>(LHSAndRHS);
1560 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1561 Xors.push_back(Xor);
1562 }
1563
1564 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1565 // to the desired narrow type so that we can OR them together later.
1566 SmallVector<Register, 4> WidenedXors;
1567 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1568 auto LHS = std::get<0>(LHSAndRHS);
1569 auto RHS = std::get<1>(LHSAndRHS);
1570 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1571 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1572 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1573 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1574 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1575 }
1576
1577 // Now, for each part we broke up, we know if they are equal/not equal
1578 // based off the G_XOR. We can OR these all together and compare against
1579 // 0 to get the result.
1580 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1581 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1582 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1583 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1584 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1585 } else {
1586 // TODO: Handle non-power-of-two types.
1587 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1588 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1589 Register LHSL = LHSPartRegs[0];
1590 Register LHSH = LHSPartRegs[1];
1591 Register RHSL = RHSPartRegs[0];
1592 Register RHSH = RHSPartRegs[1];
1593 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1594 MachineInstrBuilder CmpHEQ =
1597 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1598 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1599 }
1600 MI.eraseFromParent();
1601 return Legalized;
1602 }
1603 case TargetOpcode::G_FCMP:
1604 if (TypeIdx != 0)
1605 return UnableToLegalize;
1606
1608 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1610 return Legalized;
1611
1612 case TargetOpcode::G_SEXT_INREG: {
1613 if (TypeIdx != 0)
1614 return UnableToLegalize;
1615
1616 int64_t SizeInBits = MI.getOperand(2).getImm();
1617
1618 // So long as the new type has more bits than the bits we're extending we
1619 // don't need to break it apart.
1620 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1622 // We don't lose any non-extension bits by truncating the src and
1623 // sign-extending the dst.
1624 MachineOperand &MO1 = MI.getOperand(1);
1625 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1626 MO1.setReg(TruncMIB.getReg(0));
1627
1628 MachineOperand &MO2 = MI.getOperand(0);
1629 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1631 MIRBuilder.buildSExt(MO2, DstExt);
1632 MO2.setReg(DstExt);
1634 return Legalized;
1635 }
1636
1637 // Break it apart. Components below the extension point are unmodified. The
1638 // component containing the extension point becomes a narrower SEXT_INREG.
1639 // Components above it are ashr'd from the component containing the
1640 // extension point.
1641 if (SizeOp0 % NarrowSize != 0)
1642 return UnableToLegalize;
1643 int NumParts = SizeOp0 / NarrowSize;
1644
1645 // List the registers where the destination will be scattered.
1647 // List the registers where the source will be split.
1649
1650 // Create all the temporary registers.
1651 for (int i = 0; i < NumParts; ++i) {
1652 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1653
1654 SrcRegs.push_back(SrcReg);
1655 }
1656
1657 // Explode the big arguments into smaller chunks.
1658 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1659
1660 Register AshrCstReg =
1661 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1662 .getReg(0);
1663 Register FullExtensionReg;
1664 Register PartialExtensionReg;
1665
1666 // Do the operation on each small part.
1667 for (int i = 0; i < NumParts; ++i) {
1668 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1669 DstRegs.push_back(SrcRegs[i]);
1670 PartialExtensionReg = DstRegs.back();
1671 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1672 assert(PartialExtensionReg &&
1673 "Expected to visit partial extension before full");
1674 if (FullExtensionReg) {
1675 DstRegs.push_back(FullExtensionReg);
1676 continue;
1677 }
1678 DstRegs.push_back(
1679 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1680 .getReg(0));
1681 FullExtensionReg = DstRegs.back();
1682 } else {
1683 DstRegs.push_back(
1685 .buildInstr(
1686 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1687 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1688 .getReg(0));
1689 PartialExtensionReg = DstRegs.back();
1690 }
1691 }
1692
1693 // Gather the destination registers into the final destination.
1694 Register DstReg = MI.getOperand(0).getReg();
1695 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1696 MI.eraseFromParent();
1697 return Legalized;
1698 }
1699 case TargetOpcode::G_BSWAP:
1700 case TargetOpcode::G_BITREVERSE: {
1701 if (SizeOp0 % NarrowSize != 0)
1702 return UnableToLegalize;
1703
1705 SmallVector<Register, 2> SrcRegs, DstRegs;
1706 unsigned NumParts = SizeOp0 / NarrowSize;
1707 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1708 MIRBuilder, MRI);
1709
1710 for (unsigned i = 0; i < NumParts; ++i) {
1711 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1712 {SrcRegs[NumParts - 1 - i]});
1713 DstRegs.push_back(DstPart.getReg(0));
1714 }
1715
1716 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1717
1719 MI.eraseFromParent();
1720 return Legalized;
1721 }
1722 case TargetOpcode::G_PTR_ADD:
1723 case TargetOpcode::G_PTRMASK: {
1724 if (TypeIdx != 1)
1725 return UnableToLegalize;
1727 narrowScalarSrc(MI, NarrowTy, 2);
1729 return Legalized;
1730 }
1731 case TargetOpcode::G_FPTOUI:
1732 case TargetOpcode::G_FPTOSI:
1733 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1734 case TargetOpcode::G_FPEXT:
1735 if (TypeIdx != 0)
1736 return UnableToLegalize;
1738 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1740 return Legalized;
1741 case TargetOpcode::G_FLDEXP:
1742 case TargetOpcode::G_STRICT_FLDEXP:
1743 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1744 case TargetOpcode::G_VSCALE: {
1745 Register Dst = MI.getOperand(0).getReg();
1746 LLT Ty = MRI.getType(Dst);
1747
1748 // Assume VSCALE(1) fits into a legal integer
1749 const APInt One(NarrowTy.getSizeInBits(), 1);
1750 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1751 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1752 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1753 MIRBuilder.buildMul(Dst, ZExt, C);
1754
1755 MI.eraseFromParent();
1756 return Legalized;
1757 }
1758 }
1759}
1760
1762 LLT Ty = MRI.getType(Val);
1763 if (Ty.isScalar())
1764 return Val;
1765
1767 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1768 if (Ty.isPointer()) {
1769 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1770 return Register();
1771 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1772 }
1773
1774 Register NewVal = Val;
1775
1776 assert(Ty.isVector());
1777 if (Ty.isPointerVector())
1778 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1779 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1780}
1781
1783 unsigned OpIdx, unsigned ExtOpcode) {
1784 MachineOperand &MO = MI.getOperand(OpIdx);
1785 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1786 MO.setReg(ExtB.getReg(0));
1787}
1788
1790 unsigned OpIdx) {
1791 MachineOperand &MO = MI.getOperand(OpIdx);
1792 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1793 MO.setReg(ExtB.getReg(0));
1794}
1795
1797 unsigned OpIdx, unsigned TruncOpcode) {
1798 MachineOperand &MO = MI.getOperand(OpIdx);
1799 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1801 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1802 MO.setReg(DstExt);
1803}
1804
1806 unsigned OpIdx, unsigned ExtOpcode) {
1807 MachineOperand &MO = MI.getOperand(OpIdx);
1808 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1810 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1811 MO.setReg(DstTrunc);
1812}
1813
1815 unsigned OpIdx) {
1816 MachineOperand &MO = MI.getOperand(OpIdx);
1818 Register Dst = MO.getReg();
1819 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1820 MO.setReg(DstExt);
1822}
1823
1825 unsigned OpIdx) {
1826 MachineOperand &MO = MI.getOperand(OpIdx);
1829}
1830
1831void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1832 MachineOperand &Op = MI.getOperand(OpIdx);
1833 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1834}
1835
1836void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1837 MachineOperand &MO = MI.getOperand(OpIdx);
1838 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1840 MIRBuilder.buildBitcast(MO, CastDst);
1841 MO.setReg(CastDst);
1842}
1843
1845LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1846 LLT WideTy) {
1847 if (TypeIdx != 1)
1848 return UnableToLegalize;
1849
1850 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1851 if (DstTy.isVector())
1852 return UnableToLegalize;
1853
1854 LLT SrcTy = MRI.getType(Src1Reg);
1855 const int DstSize = DstTy.getSizeInBits();
1856 const int SrcSize = SrcTy.getSizeInBits();
1857 const int WideSize = WideTy.getSizeInBits();
1858 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1859
1860 unsigned NumOps = MI.getNumOperands();
1861 unsigned NumSrc = MI.getNumOperands() - 1;
1862 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1863
1864 if (WideSize >= DstSize) {
1865 // Directly pack the bits in the target type.
1866 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1867
1868 for (unsigned I = 2; I != NumOps; ++I) {
1869 const unsigned Offset = (I - 1) * PartSize;
1870
1871 Register SrcReg = MI.getOperand(I).getReg();
1872 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1873
1874 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1875
1876 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1877 MRI.createGenericVirtualRegister(WideTy);
1878
1879 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1880 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1881 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1882 ResultReg = NextResult;
1883 }
1884
1885 if (WideSize > DstSize)
1886 MIRBuilder.buildTrunc(DstReg, ResultReg);
1887 else if (DstTy.isPointer())
1888 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1889
1890 MI.eraseFromParent();
1891 return Legalized;
1892 }
1893
1894 // Unmerge the original values to the GCD type, and recombine to the next
1895 // multiple greater than the original type.
1896 //
1897 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1898 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1899 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1900 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1901 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1902 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1903 // %12:_(s12) = G_MERGE_VALUES %10, %11
1904 //
1905 // Padding with undef if necessary:
1906 //
1907 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1908 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1909 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1910 // %7:_(s2) = G_IMPLICIT_DEF
1911 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1912 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1913 // %10:_(s12) = G_MERGE_VALUES %8, %9
1914
1915 const int GCD = std::gcd(SrcSize, WideSize);
1916 LLT GCDTy = LLT::scalar(GCD);
1917
1919 SmallVector<Register, 8> NewMergeRegs;
1920 SmallVector<Register, 8> Unmerges;
1921 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1922
1923 // Decompose the original operands if they don't evenly divide.
1924 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1925 Register SrcReg = MO.getReg();
1926 if (GCD == SrcSize) {
1927 Unmerges.push_back(SrcReg);
1928 } else {
1929 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1930 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1931 Unmerges.push_back(Unmerge.getReg(J));
1932 }
1933 }
1934
1935 // Pad with undef to the next size that is a multiple of the requested size.
1936 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1937 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1938 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1939 Unmerges.push_back(UndefReg);
1940 }
1941
1942 const int PartsPerGCD = WideSize / GCD;
1943
1944 // Build merges of each piece.
1945 ArrayRef<Register> Slicer(Unmerges);
1946 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1947 auto Merge =
1948 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1949 NewMergeRegs.push_back(Merge.getReg(0));
1950 }
1951
1952 // A truncate may be necessary if the requested type doesn't evenly divide the
1953 // original result type.
1954 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1955 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1956 } else {
1957 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1958 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1959 }
1960
1961 MI.eraseFromParent();
1962 return Legalized;
1963}
1964
1966LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1967 LLT WideTy) {
1968 if (TypeIdx != 0)
1969 return UnableToLegalize;
1970
1971 int NumDst = MI.getNumOperands() - 1;
1972 Register SrcReg = MI.getOperand(NumDst).getReg();
1973 LLT SrcTy = MRI.getType(SrcReg);
1974 if (SrcTy.isVector())
1975 return UnableToLegalize;
1976
1977 Register Dst0Reg = MI.getOperand(0).getReg();
1978 LLT DstTy = MRI.getType(Dst0Reg);
1979 if (!DstTy.isScalar())
1980 return UnableToLegalize;
1981
1982 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1983 if (SrcTy.isPointer()) {
1985 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1986 LLVM_DEBUG(
1987 dbgs() << "Not casting non-integral address space integer\n");
1988 return UnableToLegalize;
1989 }
1990
1991 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1992 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1993 }
1994
1995 // Widen SrcTy to WideTy. This does not affect the result, but since the
1996 // user requested this size, it is probably better handled than SrcTy and
1997 // should reduce the total number of legalization artifacts.
1998 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1999 SrcTy = WideTy;
2000 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2001 }
2002
2003 // Theres no unmerge type to target. Directly extract the bits from the
2004 // source type
2005 unsigned DstSize = DstTy.getSizeInBits();
2006
2007 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2008 for (int I = 1; I != NumDst; ++I) {
2009 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2010 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2011 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2012 }
2013
2014 MI.eraseFromParent();
2015 return Legalized;
2016 }
2017
2018 // Extend the source to a wider type.
2019 LLT LCMTy = getLCMType(SrcTy, WideTy);
2020
2021 Register WideSrc = SrcReg;
2022 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2023 // TODO: If this is an integral address space, cast to integer and anyext.
2024 if (SrcTy.isPointer()) {
2025 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2026 return UnableToLegalize;
2027 }
2028
2029 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2030 }
2031
2032 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2033
2034 // Create a sequence of unmerges and merges to the original results. Since we
2035 // may have widened the source, we will need to pad the results with dead defs
2036 // to cover the source register.
2037 // e.g. widen s48 to s64:
2038 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2039 //
2040 // =>
2041 // %4:_(s192) = G_ANYEXT %0:_(s96)
2042 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2043 // ; unpack to GCD type, with extra dead defs
2044 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2045 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2046 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2047 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2048 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2049 const LLT GCDTy = getGCDType(WideTy, DstTy);
2050 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2051 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2052
2053 // Directly unmerge to the destination without going through a GCD type
2054 // if possible
2055 if (PartsPerRemerge == 1) {
2056 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2057
2058 for (int I = 0; I != NumUnmerge; ++I) {
2059 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2060
2061 for (int J = 0; J != PartsPerUnmerge; ++J) {
2062 int Idx = I * PartsPerUnmerge + J;
2063 if (Idx < NumDst)
2064 MIB.addDef(MI.getOperand(Idx).getReg());
2065 else {
2066 // Create dead def for excess components.
2067 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2068 }
2069 }
2070
2071 MIB.addUse(Unmerge.getReg(I));
2072 }
2073 } else {
2075 for (int J = 0; J != NumUnmerge; ++J)
2076 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2077
2078 SmallVector<Register, 8> RemergeParts;
2079 for (int I = 0; I != NumDst; ++I) {
2080 for (int J = 0; J < PartsPerRemerge; ++J) {
2081 const int Idx = I * PartsPerRemerge + J;
2082 RemergeParts.emplace_back(Parts[Idx]);
2083 }
2084
2085 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2086 RemergeParts.clear();
2087 }
2088 }
2089
2090 MI.eraseFromParent();
2091 return Legalized;
2092}
2093
2095LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2096 LLT WideTy) {
2097 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2098 unsigned Offset = MI.getOperand(2).getImm();
2099
2100 if (TypeIdx == 0) {
2101 if (SrcTy.isVector() || DstTy.isVector())
2102 return UnableToLegalize;
2103
2104 SrcOp Src(SrcReg);
2105 if (SrcTy.isPointer()) {
2106 // Extracts from pointers can be handled only if they are really just
2107 // simple integers.
2109 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2110 return UnableToLegalize;
2111
2112 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2113 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2114 SrcTy = SrcAsIntTy;
2115 }
2116
2117 if (DstTy.isPointer())
2118 return UnableToLegalize;
2119
2120 if (Offset == 0) {
2121 // Avoid a shift in the degenerate case.
2122 MIRBuilder.buildTrunc(DstReg,
2123 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2124 MI.eraseFromParent();
2125 return Legalized;
2126 }
2127
2128 // Do a shift in the source type.
2129 LLT ShiftTy = SrcTy;
2130 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2131 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2132 ShiftTy = WideTy;
2133 }
2134
2135 auto LShr = MIRBuilder.buildLShr(
2136 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2137 MIRBuilder.buildTrunc(DstReg, LShr);
2138 MI.eraseFromParent();
2139 return Legalized;
2140 }
2141
2142 if (SrcTy.isScalar()) {
2144 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2146 return Legalized;
2147 }
2148
2149 if (!SrcTy.isVector())
2150 return UnableToLegalize;
2151
2152 if (DstTy != SrcTy.getElementType())
2153 return UnableToLegalize;
2154
2155 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2156 return UnableToLegalize;
2157
2159 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2160
2161 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2162 Offset);
2163 widenScalarDst(MI, WideTy.getScalarType(), 0);
2165 return Legalized;
2166}
2167
2169LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2170 LLT WideTy) {
2171 if (TypeIdx != 0 || WideTy.isVector())
2172 return UnableToLegalize;
2174 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2175 widenScalarDst(MI, WideTy);
2177 return Legalized;
2178}
2179
2181LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2182 LLT WideTy) {
2183 unsigned Opcode;
2184 unsigned ExtOpcode;
2185 std::optional<Register> CarryIn;
2186 switch (MI.getOpcode()) {
2187 default:
2188 llvm_unreachable("Unexpected opcode!");
2189 case TargetOpcode::G_SADDO:
2190 Opcode = TargetOpcode::G_ADD;
2191 ExtOpcode = TargetOpcode::G_SEXT;
2192 break;
2193 case TargetOpcode::G_SSUBO:
2194 Opcode = TargetOpcode::G_SUB;
2195 ExtOpcode = TargetOpcode::G_SEXT;
2196 break;
2197 case TargetOpcode::G_UADDO:
2198 Opcode = TargetOpcode::G_ADD;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2200 break;
2201 case TargetOpcode::G_USUBO:
2202 Opcode = TargetOpcode::G_SUB;
2203 ExtOpcode = TargetOpcode::G_ZEXT;
2204 break;
2205 case TargetOpcode::G_SADDE:
2206 Opcode = TargetOpcode::G_UADDE;
2207 ExtOpcode = TargetOpcode::G_SEXT;
2208 CarryIn = MI.getOperand(4).getReg();
2209 break;
2210 case TargetOpcode::G_SSUBE:
2211 Opcode = TargetOpcode::G_USUBE;
2212 ExtOpcode = TargetOpcode::G_SEXT;
2213 CarryIn = MI.getOperand(4).getReg();
2214 break;
2215 case TargetOpcode::G_UADDE:
2216 Opcode = TargetOpcode::G_UADDE;
2217 ExtOpcode = TargetOpcode::G_ZEXT;
2218 CarryIn = MI.getOperand(4).getReg();
2219 break;
2220 case TargetOpcode::G_USUBE:
2221 Opcode = TargetOpcode::G_USUBE;
2222 ExtOpcode = TargetOpcode::G_ZEXT;
2223 CarryIn = MI.getOperand(4).getReg();
2224 break;
2225 }
2226
2227 if (TypeIdx == 1) {
2228 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2229
2231 if (CarryIn)
2232 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2233 widenScalarDst(MI, WideTy, 1);
2234
2236 return Legalized;
2237 }
2238
2239 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2240 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2241 // Do the arithmetic in the larger type.
2242 Register NewOp;
2243 if (CarryIn) {
2244 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2245 NewOp = MIRBuilder
2246 .buildInstr(Opcode, {WideTy, CarryOutTy},
2247 {LHSExt, RHSExt, *CarryIn})
2248 .getReg(0);
2249 } else {
2250 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2251 }
2252 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2253 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2254 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2255 // There is no overflow if the ExtOp is the same as NewOp.
2256 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2257 // Now trunc the NewOp to the original result.
2258 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2259 MI.eraseFromParent();
2260 return Legalized;
2261}
2262
2264LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2265 LLT WideTy) {
2266 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2267 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2268 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2269 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2270 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2271 // We can convert this to:
2272 // 1. Any extend iN to iM
2273 // 2. SHL by M-N
2274 // 3. [US][ADD|SUB|SHL]SAT
2275 // 4. L/ASHR by M-N
2276 //
2277 // It may be more efficient to lower this to a min and a max operation in
2278 // the higher precision arithmetic if the promoted operation isn't legal,
2279 // but this decision is up to the target's lowering request.
2280 Register DstReg = MI.getOperand(0).getReg();
2281
2282 unsigned NewBits = WideTy.getScalarSizeInBits();
2283 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2284
2285 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2286 // must not left shift the RHS to preserve the shift amount.
2287 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2288 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2289 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2290 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2291 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2292 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2293
2294 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2295 {ShiftL, ShiftR}, MI.getFlags());
2296
2297 // Use a shift that will preserve the number of sign bits when the trunc is
2298 // folded away.
2299 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2300 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2301
2302 MIRBuilder.buildTrunc(DstReg, Result);
2303 MI.eraseFromParent();
2304 return Legalized;
2305}
2306
2308LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2309 LLT WideTy) {
2310 if (TypeIdx == 1) {
2312 widenScalarDst(MI, WideTy, 1);
2314 return Legalized;
2315 }
2316
2317 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2318 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2319 LLT SrcTy = MRI.getType(LHS);
2320 LLT OverflowTy = MRI.getType(OriginalOverflow);
2321 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2322
2323 // To determine if the result overflowed in the larger type, we extend the
2324 // input to the larger type, do the multiply (checking if it overflows),
2325 // then also check the high bits of the result to see if overflow happened
2326 // there.
2327 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2328 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2329 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2330
2331 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2332 // so we don't need to check the overflow result of larger type Mulo.
2333 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2334
2335 unsigned MulOpc =
2336 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2337
2339 if (WideMulCanOverflow)
2340 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2341 {LeftOperand, RightOperand});
2342 else
2343 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2344
2345 auto Mul = Mulo->getOperand(0);
2346 MIRBuilder.buildTrunc(Result, Mul);
2347
2348 MachineInstrBuilder ExtResult;
2349 // Overflow occurred if it occurred in the larger type, or if the high part
2350 // of the result does not zero/sign-extend the low part. Check this second
2351 // possibility first.
2352 if (IsSigned) {
2353 // For signed, overflow occurred when the high part does not sign-extend
2354 // the low part.
2355 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2356 } else {
2357 // Unsigned overflow occurred when the high part does not zero-extend the
2358 // low part.
2359 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2360 }
2361
2362 if (WideMulCanOverflow) {
2363 auto Overflow =
2364 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2365 // Finally check if the multiplication in the larger type itself overflowed.
2366 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2367 } else {
2368 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2369 }
2370 MI.eraseFromParent();
2371 return Legalized;
2372}
2373
2376 switch (MI.getOpcode()) {
2377 default:
2378 return UnableToLegalize;
2379 case TargetOpcode::G_ATOMICRMW_XCHG:
2380 case TargetOpcode::G_ATOMICRMW_ADD:
2381 case TargetOpcode::G_ATOMICRMW_SUB:
2382 case TargetOpcode::G_ATOMICRMW_AND:
2383 case TargetOpcode::G_ATOMICRMW_OR:
2384 case TargetOpcode::G_ATOMICRMW_XOR:
2385 case TargetOpcode::G_ATOMICRMW_MIN:
2386 case TargetOpcode::G_ATOMICRMW_MAX:
2387 case TargetOpcode::G_ATOMICRMW_UMIN:
2388 case TargetOpcode::G_ATOMICRMW_UMAX:
2389 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2391 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2392 widenScalarDst(MI, WideTy, 0);
2394 return Legalized;
2395 case TargetOpcode::G_ATOMIC_CMPXCHG:
2396 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2398 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2399 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2400 widenScalarDst(MI, WideTy, 0);
2402 return Legalized;
2403 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2404 if (TypeIdx == 0) {
2406 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2407 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2408 widenScalarDst(MI, WideTy, 0);
2410 return Legalized;
2411 }
2412 assert(TypeIdx == 1 &&
2413 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2415 widenScalarDst(MI, WideTy, 1);
2417 return Legalized;
2418 case TargetOpcode::G_EXTRACT:
2419 return widenScalarExtract(MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_INSERT:
2421 return widenScalarInsert(MI, TypeIdx, WideTy);
2422 case TargetOpcode::G_MERGE_VALUES:
2423 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2424 case TargetOpcode::G_UNMERGE_VALUES:
2425 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2426 case TargetOpcode::G_SADDO:
2427 case TargetOpcode::G_SSUBO:
2428 case TargetOpcode::G_UADDO:
2429 case TargetOpcode::G_USUBO:
2430 case TargetOpcode::G_SADDE:
2431 case TargetOpcode::G_SSUBE:
2432 case TargetOpcode::G_UADDE:
2433 case TargetOpcode::G_USUBE:
2434 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2435 case TargetOpcode::G_UMULO:
2436 case TargetOpcode::G_SMULO:
2437 return widenScalarMulo(MI, TypeIdx, WideTy);
2438 case TargetOpcode::G_SADDSAT:
2439 case TargetOpcode::G_SSUBSAT:
2440 case TargetOpcode::G_SSHLSAT:
2441 case TargetOpcode::G_UADDSAT:
2442 case TargetOpcode::G_USUBSAT:
2443 case TargetOpcode::G_USHLSAT:
2444 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2445 case TargetOpcode::G_CTTZ:
2446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2447 case TargetOpcode::G_CTLZ:
2448 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2449 case TargetOpcode::G_CTPOP: {
2450 if (TypeIdx == 0) {
2452 widenScalarDst(MI, WideTy, 0);
2454 return Legalized;
2455 }
2456
2457 Register SrcReg = MI.getOperand(1).getReg();
2458
2459 // First extend the input.
2460 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2461 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2462 ? TargetOpcode::G_ANYEXT
2463 : TargetOpcode::G_ZEXT;
2464 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2465 LLT CurTy = MRI.getType(SrcReg);
2466 unsigned NewOpc = MI.getOpcode();
2467 if (NewOpc == TargetOpcode::G_CTTZ) {
2468 // The count is the same in the larger type except if the original
2469 // value was zero. This can be handled by setting the bit just off
2470 // the top of the original type.
2471 auto TopBit =
2473 MIBSrc = MIRBuilder.buildOr(
2474 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2475 // Now we know the operand is non-zero, use the more relaxed opcode.
2476 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2477 }
2478
2479 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2480
2481 if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2482 // An optimization where the result is the CTLZ after the left shift by
2483 // (Difference in widety and current ty), that is,
2484 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2485 // Result = ctlz MIBSrc
2486 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2487 MIRBuilder.buildConstant(WideTy, SizeDiff));
2488 }
2489
2490 // Perform the operation at the larger size.
2491 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2492 // This is already the correct result for CTPOP and CTTZs
2493 if (MI.getOpcode() == TargetOpcode::G_CTLZ) {
2494 // The correct result is NewOp - (Difference in widety and current ty).
2495 MIBNewOp = MIRBuilder.buildSub(
2496 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2497 }
2498
2499 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2500 MI.eraseFromParent();
2501 return Legalized;
2502 }
2503 case TargetOpcode::G_BSWAP: {
2505 Register DstReg = MI.getOperand(0).getReg();
2506
2507 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2508 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2509 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2510 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2511
2512 MI.getOperand(0).setReg(DstExt);
2513
2515
2516 LLT Ty = MRI.getType(DstReg);
2517 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2518 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2519 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2520
2521 MIRBuilder.buildTrunc(DstReg, ShrReg);
2523 return Legalized;
2524 }
2525 case TargetOpcode::G_BITREVERSE: {
2527
2528 Register DstReg = MI.getOperand(0).getReg();
2529 LLT Ty = MRI.getType(DstReg);
2530 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2531
2532 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2533 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2534 MI.getOperand(0).setReg(DstExt);
2536
2537 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2538 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2539 MIRBuilder.buildTrunc(DstReg, Shift);
2541 return Legalized;
2542 }
2543 case TargetOpcode::G_FREEZE:
2544 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2546 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2547 widenScalarDst(MI, WideTy);
2549 return Legalized;
2550
2551 case TargetOpcode::G_ABS:
2553 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2554 widenScalarDst(MI, WideTy);
2556 return Legalized;
2557
2558 case TargetOpcode::G_ADD:
2559 case TargetOpcode::G_AND:
2560 case TargetOpcode::G_MUL:
2561 case TargetOpcode::G_OR:
2562 case TargetOpcode::G_XOR:
2563 case TargetOpcode::G_SUB:
2564 case TargetOpcode::G_SHUFFLE_VECTOR:
2565 // Perform operation at larger width (any extension is fines here, high bits
2566 // don't affect the result) and then truncate the result back to the
2567 // original type.
2569 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2570 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2571 widenScalarDst(MI, WideTy);
2573 return Legalized;
2574
2575 case TargetOpcode::G_SBFX:
2576 case TargetOpcode::G_UBFX:
2578
2579 if (TypeIdx == 0) {
2580 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2581 widenScalarDst(MI, WideTy);
2582 } else {
2583 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2584 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2585 }
2586
2588 return Legalized;
2589
2590 case TargetOpcode::G_SHL:
2592
2593 if (TypeIdx == 0) {
2594 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2595 widenScalarDst(MI, WideTy);
2596 } else {
2597 assert(TypeIdx == 1);
2598 // The "number of bits to shift" operand must preserve its value as an
2599 // unsigned integer:
2600 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2601 }
2602
2604 return Legalized;
2605
2606 case TargetOpcode::G_ROTR:
2607 case TargetOpcode::G_ROTL:
2608 if (TypeIdx != 1)
2609 return UnableToLegalize;
2610
2612 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2614 return Legalized;
2615
2616 case TargetOpcode::G_SDIV:
2617 case TargetOpcode::G_SREM:
2618 case TargetOpcode::G_SMIN:
2619 case TargetOpcode::G_SMAX:
2621 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2622 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2623 widenScalarDst(MI, WideTy);
2625 return Legalized;
2626
2627 case TargetOpcode::G_SDIVREM:
2629 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2630 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2631 widenScalarDst(MI, WideTy);
2632 widenScalarDst(MI, WideTy, 1);
2634 return Legalized;
2635
2636 case TargetOpcode::G_ASHR:
2637 case TargetOpcode::G_LSHR:
2639
2640 if (TypeIdx == 0) {
2641 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2642 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2643
2644 widenScalarSrc(MI, WideTy, 1, CvtOp);
2645 widenScalarDst(MI, WideTy);
2646 } else {
2647 assert(TypeIdx == 1);
2648 // The "number of bits to shift" operand must preserve its value as an
2649 // unsigned integer:
2650 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2651 }
2652
2654 return Legalized;
2655 case TargetOpcode::G_UDIV:
2656 case TargetOpcode::G_UREM:
2657 case TargetOpcode::G_UMIN:
2658 case TargetOpcode::G_UMAX:
2660 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2661 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2662 widenScalarDst(MI, WideTy);
2664 return Legalized;
2665
2666 case TargetOpcode::G_UDIVREM:
2668 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2669 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2670 widenScalarDst(MI, WideTy);
2671 widenScalarDst(MI, WideTy, 1);
2673 return Legalized;
2674
2675 case TargetOpcode::G_SELECT:
2677 if (TypeIdx == 0) {
2678 // Perform operation at larger width (any extension is fine here, high
2679 // bits don't affect the result) and then truncate the result back to the
2680 // original type.
2681 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2682 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2683 widenScalarDst(MI, WideTy);
2684 } else {
2685 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2686 // Explicit extension is required here since high bits affect the result.
2687 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2688 }
2690 return Legalized;
2691
2692 case TargetOpcode::G_FPTOSI:
2693 case TargetOpcode::G_FPTOUI:
2694 case TargetOpcode::G_INTRINSIC_LRINT:
2695 case TargetOpcode::G_INTRINSIC_LLRINT:
2696 case TargetOpcode::G_IS_FPCLASS:
2698
2699 if (TypeIdx == 0)
2700 widenScalarDst(MI, WideTy);
2701 else
2702 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2703
2705 return Legalized;
2706 case TargetOpcode::G_SITOFP:
2708
2709 if (TypeIdx == 0)
2710 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2711 else
2712 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2713
2715 return Legalized;
2716 case TargetOpcode::G_UITOFP:
2718
2719 if (TypeIdx == 0)
2720 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2721 else
2722 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2723
2725 return Legalized;
2726 case TargetOpcode::G_LOAD:
2727 case TargetOpcode::G_SEXTLOAD:
2728 case TargetOpcode::G_ZEXTLOAD:
2730 widenScalarDst(MI, WideTy);
2732 return Legalized;
2733
2734 case TargetOpcode::G_STORE: {
2735 if (TypeIdx != 0)
2736 return UnableToLegalize;
2737
2738 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2739 if (!Ty.isScalar())
2740 return UnableToLegalize;
2741
2743
2744 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2745 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2746 widenScalarSrc(MI, WideTy, 0, ExtType);
2747
2749 return Legalized;
2750 }
2751 case TargetOpcode::G_CONSTANT: {
2752 MachineOperand &SrcMO = MI.getOperand(1);
2754 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2755 MRI.getType(MI.getOperand(0).getReg()));
2756 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2757 ExtOpc == TargetOpcode::G_ANYEXT) &&
2758 "Illegal Extend");
2759 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2760 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2761 ? SrcVal.sext(WideTy.getSizeInBits())
2762 : SrcVal.zext(WideTy.getSizeInBits());
2764 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2765
2766 widenScalarDst(MI, WideTy);
2768 return Legalized;
2769 }
2770 case TargetOpcode::G_FCONSTANT: {
2771 // To avoid changing the bits of the constant due to extension to a larger
2772 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2773 MachineOperand &SrcMO = MI.getOperand(1);
2774 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2776 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2777 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2778 MI.eraseFromParent();
2779 return Legalized;
2780 }
2781 case TargetOpcode::G_IMPLICIT_DEF: {
2783 widenScalarDst(MI, WideTy);
2785 return Legalized;
2786 }
2787 case TargetOpcode::G_BRCOND:
2789 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2791 return Legalized;
2792
2793 case TargetOpcode::G_FCMP:
2795 if (TypeIdx == 0)
2796 widenScalarDst(MI, WideTy);
2797 else {
2798 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2799 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2800 }
2802 return Legalized;
2803
2804 case TargetOpcode::G_ICMP:
2806 if (TypeIdx == 0)
2807 widenScalarDst(MI, WideTy);
2808 else {
2809 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2810 MI.getOperand(1).getPredicate()))
2811 ? TargetOpcode::G_SEXT
2812 : TargetOpcode::G_ZEXT;
2813 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2814 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2815 }
2817 return Legalized;
2818
2819 case TargetOpcode::G_PTR_ADD:
2820 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2822 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2824 return Legalized;
2825
2826 case TargetOpcode::G_PHI: {
2827 assert(TypeIdx == 0 && "Expecting only Idx 0");
2828
2830 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2831 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2833 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2834 }
2835
2836 MachineBasicBlock &MBB = *MI.getParent();
2838 widenScalarDst(MI, WideTy);
2840 return Legalized;
2841 }
2842 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2843 if (TypeIdx == 0) {
2844 Register VecReg = MI.getOperand(1).getReg();
2845 LLT VecTy = MRI.getType(VecReg);
2847
2849 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2850 TargetOpcode::G_ANYEXT);
2851
2852 widenScalarDst(MI, WideTy, 0);
2854 return Legalized;
2855 }
2856
2857 if (TypeIdx != 2)
2858 return UnableToLegalize;
2860 // TODO: Probably should be zext
2861 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2863 return Legalized;
2864 }
2865 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2866 if (TypeIdx == 0) {
2868 const LLT WideEltTy = WideTy.getElementType();
2869
2870 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2871 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2872 widenScalarDst(MI, WideTy, 0);
2874 return Legalized;
2875 }
2876
2877 if (TypeIdx == 1) {
2879
2880 Register VecReg = MI.getOperand(1).getReg();
2881 LLT VecTy = MRI.getType(VecReg);
2882 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2883
2884 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2885 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2886 widenScalarDst(MI, WideVecTy, 0);
2888 return Legalized;
2889 }
2890
2891 if (TypeIdx == 2) {
2893 // TODO: Probably should be zext
2894 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2896 return Legalized;
2897 }
2898
2899 return UnableToLegalize;
2900 }
2901 case TargetOpcode::G_FADD:
2902 case TargetOpcode::G_FMUL:
2903 case TargetOpcode::G_FSUB:
2904 case TargetOpcode::G_FMA:
2905 case TargetOpcode::G_FMAD:
2906 case TargetOpcode::G_FNEG:
2907 case TargetOpcode::G_FABS:
2908 case TargetOpcode::G_FCANONICALIZE:
2909 case TargetOpcode::G_FMINNUM:
2910 case TargetOpcode::G_FMAXNUM:
2911 case TargetOpcode::G_FMINNUM_IEEE:
2912 case TargetOpcode::G_FMAXNUM_IEEE:
2913 case TargetOpcode::G_FMINIMUM:
2914 case TargetOpcode::G_FMAXIMUM:
2915 case TargetOpcode::G_FDIV:
2916 case TargetOpcode::G_FREM:
2917 case TargetOpcode::G_FCEIL:
2918 case TargetOpcode::G_FFLOOR:
2919 case TargetOpcode::G_FCOS:
2920 case TargetOpcode::G_FSIN:
2921 case TargetOpcode::G_FTAN:
2922 case TargetOpcode::G_FACOS:
2923 case TargetOpcode::G_FASIN:
2924 case TargetOpcode::G_FATAN:
2925 case TargetOpcode::G_FCOSH:
2926 case TargetOpcode::G_FSINH:
2927 case TargetOpcode::G_FTANH:
2928 case TargetOpcode::G_FLOG10:
2929 case TargetOpcode::G_FLOG:
2930 case TargetOpcode::G_FLOG2:
2931 case TargetOpcode::G_FRINT:
2932 case TargetOpcode::G_FNEARBYINT:
2933 case TargetOpcode::G_FSQRT:
2934 case TargetOpcode::G_FEXP:
2935 case TargetOpcode::G_FEXP2:
2936 case TargetOpcode::G_FEXP10:
2937 case TargetOpcode::G_FPOW:
2938 case TargetOpcode::G_INTRINSIC_TRUNC:
2939 case TargetOpcode::G_INTRINSIC_ROUND:
2940 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2941 assert(TypeIdx == 0);
2943
2944 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2945 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2946
2947 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2949 return Legalized;
2950 case TargetOpcode::G_FPOWI:
2951 case TargetOpcode::G_FLDEXP:
2952 case TargetOpcode::G_STRICT_FLDEXP: {
2953 if (TypeIdx == 0) {
2954 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2955 return UnableToLegalize;
2956
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2959 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2961 return Legalized;
2962 }
2963
2964 if (TypeIdx == 1) {
2965 // For some reason SelectionDAG tries to promote to a libcall without
2966 // actually changing the integer type for promotion.
2968 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2970 return Legalized;
2971 }
2972
2973 return UnableToLegalize;
2974 }
2975 case TargetOpcode::G_FFREXP: {
2977
2978 if (TypeIdx == 0) {
2979 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2980 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2981 } else {
2982 widenScalarDst(MI, WideTy, 1);
2983 }
2984
2986 return Legalized;
2987 }
2988 case TargetOpcode::G_INTTOPTR:
2989 if (TypeIdx != 1)
2990 return UnableToLegalize;
2991
2993 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2995 return Legalized;
2996 case TargetOpcode::G_PTRTOINT:
2997 if (TypeIdx != 0)
2998 return UnableToLegalize;
2999
3001 widenScalarDst(MI, WideTy, 0);
3003 return Legalized;
3004 case TargetOpcode::G_BUILD_VECTOR: {
3006
3007 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3008 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3009 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3010
3011 // Avoid changing the result vector type if the source element type was
3012 // requested.
3013 if (TypeIdx == 1) {
3014 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3015 } else {
3016 widenScalarDst(MI, WideTy, 0);
3017 }
3018
3020 return Legalized;
3021 }
3022 case TargetOpcode::G_SEXT_INREG:
3023 if (TypeIdx != 0)
3024 return UnableToLegalize;
3025
3027 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3028 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3030 return Legalized;
3031 case TargetOpcode::G_PTRMASK: {
3032 if (TypeIdx != 1)
3033 return UnableToLegalize;
3035 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3037 return Legalized;
3038 }
3039 case TargetOpcode::G_VECREDUCE_FADD:
3040 case TargetOpcode::G_VECREDUCE_FMUL:
3041 case TargetOpcode::G_VECREDUCE_FMIN:
3042 case TargetOpcode::G_VECREDUCE_FMAX:
3043 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3044 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3045 if (TypeIdx != 0)
3046 return UnableToLegalize;
3048 Register VecReg = MI.getOperand(1).getReg();
3049 LLT VecTy = MRI.getType(VecReg);
3050 LLT WideVecTy = VecTy.isVector()
3051 ? LLT::vector(VecTy.getElementCount(), WideTy)
3052 : WideTy;
3053 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3054 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3056 return Legalized;
3057 }
3058 case TargetOpcode::G_VSCALE: {
3059 MachineOperand &SrcMO = MI.getOperand(1);
3061 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3062 // The CImm is always a signed value
3063 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3065 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3066 widenScalarDst(MI, WideTy);
3068 return Legalized;
3069 }
3070 case TargetOpcode::G_SPLAT_VECTOR: {
3071 if (TypeIdx != 1)
3072 return UnableToLegalize;
3073
3075 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3077 return Legalized;
3078 }
3079 }
3080}
3081
3083 MachineIRBuilder &B, Register Src, LLT Ty) {
3084 auto Unmerge = B.buildUnmerge(Ty, Src);
3085 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3086 Pieces.push_back(Unmerge.getReg(I));
3087}
3088
3089static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3090 MachineIRBuilder &MIRBuilder) {
3091 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3092 MachineFunction &MF = MIRBuilder.getMF();
3093 const DataLayout &DL = MIRBuilder.getDataLayout();
3094 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3095 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3096 LLT DstLLT = MRI.getType(DstReg);
3097
3098 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3099
3100 auto Addr = MIRBuilder.buildConstantPool(
3101 AddrPtrTy,
3102 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3103
3104 MachineMemOperand *MMO =
3106 MachineMemOperand::MOLoad, DstLLT, Alignment);
3107
3108 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3109}
3110
3113 const MachineOperand &ConstOperand = MI.getOperand(1);
3114 const Constant *ConstantVal = ConstOperand.getCImm();
3115
3116 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3117 MI.eraseFromParent();
3118
3119 return Legalized;
3120}
3121
3124 const MachineOperand &ConstOperand = MI.getOperand(1);
3125 const Constant *ConstantVal = ConstOperand.getFPImm();
3126
3127 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3128 MI.eraseFromParent();
3129
3130 return Legalized;
3131}
3132
3135 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3136 if (SrcTy.isVector()) {
3137 LLT SrcEltTy = SrcTy.getElementType();
3139
3140 if (DstTy.isVector()) {
3141 int NumDstElt = DstTy.getNumElements();
3142 int NumSrcElt = SrcTy.getNumElements();
3143
3144 LLT DstEltTy = DstTy.getElementType();
3145 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3146 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3147
3148 // If there's an element size mismatch, insert intermediate casts to match
3149 // the result element type.
3150 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3151 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3152 //
3153 // =>
3154 //
3155 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3156 // %3:_(<2 x s8>) = G_BITCAST %2
3157 // %4:_(<2 x s8>) = G_BITCAST %3
3158 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3159 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3160 SrcPartTy = SrcEltTy;
3161 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3162 //
3163 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3164 //
3165 // =>
3166 //
3167 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3168 // %3:_(s16) = G_BITCAST %2
3169 // %4:_(s16) = G_BITCAST %3
3170 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3171 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3172 DstCastTy = DstEltTy;
3173 }
3174
3175 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3176 for (Register &SrcReg : SrcRegs)
3177 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3178 } else
3179 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3180
3181 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3182 MI.eraseFromParent();
3183 return Legalized;
3184 }
3185
3186 if (DstTy.isVector()) {
3188 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3189 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3190 MI.eraseFromParent();
3191 return Legalized;
3192 }
3193
3194 return UnableToLegalize;
3195}
3196
3197/// Figure out the bit offset into a register when coercing a vector index for
3198/// the wide element type. This is only for the case when promoting vector to
3199/// one with larger elements.
3200//
3201///
3202/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3203/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3205 Register Idx,
3206 unsigned NewEltSize,
3207 unsigned OldEltSize) {
3208 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3209 LLT IdxTy = B.getMRI()->getType(Idx);
3210
3211 // Now figure out the amount we need to shift to get the target bits.
3212 auto OffsetMask = B.buildConstant(
3213 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3214 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3215 return B.buildShl(IdxTy, OffsetIdx,
3216 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3217}
3218
3219/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3220/// is casting to a vector with a smaller element size, perform multiple element
3221/// extracts and merge the results. If this is coercing to a vector with larger
3222/// elements, index the bitcasted vector and extract the target element with bit
3223/// operations. This is intended to force the indexing in the native register
3224/// size for architectures that can dynamically index the register file.
3227 LLT CastTy) {
3228 if (TypeIdx != 1)
3229 return UnableToLegalize;
3230
3231 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3232
3233 LLT SrcEltTy = SrcVecTy.getElementType();
3234 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3235 unsigned OldNumElts = SrcVecTy.getNumElements();
3236
3237 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3238 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3239
3240 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3241 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3242 if (NewNumElts > OldNumElts) {
3243 // Decreasing the vector element size
3244 //
3245 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3246 // =>
3247 // v4i32:castx = bitcast x:v2i64
3248 //
3249 // i64 = bitcast
3250 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3251 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3252 //
3253 if (NewNumElts % OldNumElts != 0)
3254 return UnableToLegalize;
3255
3256 // Type of the intermediate result vector.
3257 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3258 LLT MidTy =
3259 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3260
3261 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3262
3263 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3264 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3265
3266 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3267 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3268 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3269 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3270 NewOps[I] = Elt.getReg(0);
3271 }
3272
3273 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3274 MIRBuilder.buildBitcast(Dst, NewVec);
3275 MI.eraseFromParent();
3276 return Legalized;
3277 }
3278
3279 if (NewNumElts < OldNumElts) {
3280 if (NewEltSize % OldEltSize != 0)
3281 return UnableToLegalize;
3282
3283 // This only depends on powers of 2 because we use bit tricks to figure out
3284 // the bit offset we need to shift to get the target element. A general
3285 // expansion could emit division/multiply.
3286 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3287 return UnableToLegalize;
3288
3289 // Increasing the vector element size.
3290 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3291 //
3292 // =>
3293 //
3294 // %cast = G_BITCAST %vec
3295 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3296 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3297 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3298 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3299 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3300 // %elt = G_TRUNC %elt_bits
3301
3302 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3303 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3304
3305 // Divide to get the index in the wider element type.
3306 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3307
3308 Register WideElt = CastVec;
3309 if (CastTy.isVector()) {
3310 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3311 ScaledIdx).getReg(0);
3312 }
3313
3314 // Compute the bit offset into the register of the target element.
3316 MIRBuilder, Idx, NewEltSize, OldEltSize);
3317
3318 // Shift the wide element to get the target element.
3319 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3320 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3321 MI.eraseFromParent();
3322 return Legalized;
3323 }
3324
3325 return UnableToLegalize;
3326}
3327
3328/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3329/// TargetReg, while preserving other bits in \p TargetReg.
3330///
3331/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3333 Register TargetReg, Register InsertReg,
3334 Register OffsetBits) {
3335 LLT TargetTy = B.getMRI()->getType(TargetReg);
3336 LLT InsertTy = B.getMRI()->getType(InsertReg);
3337 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3338 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3339
3340 // Produce a bitmask of the value to insert
3341 auto EltMask = B.buildConstant(
3342 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3343 InsertTy.getSizeInBits()));
3344 // Shift it into position
3345 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3346 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3347
3348 // Clear out the bits in the wide element
3349 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3350
3351 // The value to insert has all zeros already, so stick it into the masked
3352 // wide element.
3353 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3354}
3355
3356/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3357/// is increasing the element size, perform the indexing in the target element
3358/// type, and use bit operations to insert at the element position. This is
3359/// intended for architectures that can dynamically index the register file and
3360/// want to force indexing in the native register size.
3363 LLT CastTy) {
3364 if (TypeIdx != 0)
3365 return UnableToLegalize;
3366
3367 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3368 MI.getFirst4RegLLTs();
3369 LLT VecTy = DstTy;
3370
3371 LLT VecEltTy = VecTy.getElementType();
3372 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3373 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3374 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3375
3376 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3377 unsigned OldNumElts = VecTy.getNumElements();
3378
3379 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3380 if (NewNumElts < OldNumElts) {
3381 if (NewEltSize % OldEltSize != 0)
3382 return UnableToLegalize;
3383
3384 // This only depends on powers of 2 because we use bit tricks to figure out
3385 // the bit offset we need to shift to get the target element. A general
3386 // expansion could emit division/multiply.
3387 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3388 return UnableToLegalize;
3389
3390 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3391 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3392
3393 // Divide to get the index in the wider element type.
3394 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3395
3396 Register ExtractedElt = CastVec;
3397 if (CastTy.isVector()) {
3398 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3399 ScaledIdx).getReg(0);
3400 }
3401
3402 // Compute the bit offset into the register of the target element.
3404 MIRBuilder, Idx, NewEltSize, OldEltSize);
3405
3406 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3407 Val, OffsetBits);
3408 if (CastTy.isVector()) {
3410 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3411 }
3412
3413 MIRBuilder.buildBitcast(Dst, InsertedElt);
3414 MI.eraseFromParent();
3415 return Legalized;
3416 }
3417
3418 return UnableToLegalize;
3419}
3420
3421// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3422// those that have smaller than legal operands.
3423//
3424// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3425//
3426// ===>
3427//
3428// s32 = G_BITCAST <4 x s8>
3429// s32 = G_BITCAST <4 x s8>
3430// s32 = G_BITCAST <4 x s8>
3431// s32 = G_BITCAST <4 x s8>
3432// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3433// <16 x s8> = G_BITCAST <4 x s32>
3436 LLT CastTy) {
3437 // Convert it to CONCAT instruction
3438 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3439 if (!ConcatMI) {
3440 return UnableToLegalize;
3441 }
3442
3443 // Check if bitcast is Legal
3444 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3445 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3446
3447 // Check if the build vector is Legal
3448 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3449 return UnableToLegalize;
3450 }
3451
3452 // Bitcast the sources
3453 SmallVector<Register> BitcastRegs;
3454 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3455 BitcastRegs.push_back(
3456 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3457 .getReg(0));
3458 }
3459
3460 // Build the scalar values into a vector
3461 Register BuildReg =
3462 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3463 MIRBuilder.buildBitcast(DstReg, BuildReg);
3464
3465 MI.eraseFromParent();
3466 return Legalized;
3467}
3468
3470 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3471 Register DstReg = LoadMI.getDstReg();
3472 Register PtrReg = LoadMI.getPointerReg();
3473 LLT DstTy = MRI.getType(DstReg);
3474 MachineMemOperand &MMO = LoadMI.getMMO();
3475 LLT MemTy = MMO.getMemoryType();
3477
3478 unsigned MemSizeInBits = MemTy.getSizeInBits();
3479 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3480
3481 if (MemSizeInBits != MemStoreSizeInBits) {
3482 if (MemTy.isVector())
3483 return UnableToLegalize;
3484
3485 // Promote to a byte-sized load if not loading an integral number of
3486 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3487 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3488 MachineMemOperand *NewMMO =
3489 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3490
3491 Register LoadReg = DstReg;
3492 LLT LoadTy = DstTy;
3493
3494 // If this wasn't already an extending load, we need to widen the result
3495 // register to avoid creating a load with a narrower result than the source.
3496 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3497 LoadTy = WideMemTy;
3498 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3499 }
3500
3501 if (isa<GSExtLoad>(LoadMI)) {
3502 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3503 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3504 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3505 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3506 // The extra bits are guaranteed to be zero, since we stored them that
3507 // way. A zext load from Wide thus automatically gives zext from MemVT.
3508 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3509 } else {
3510 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3511 }
3512
3513 if (DstTy != LoadTy)
3514 MIRBuilder.buildTrunc(DstReg, LoadReg);
3515
3516 LoadMI.eraseFromParent();
3517 return Legalized;
3518 }
3519
3520 // Big endian lowering not implemented.
3522 return UnableToLegalize;
3523
3524 // This load needs splitting into power of 2 sized loads.
3525 //
3526 // Our strategy here is to generate anyextending loads for the smaller
3527 // types up to next power-2 result type, and then combine the two larger
3528 // result values together, before truncating back down to the non-pow-2
3529 // type.
3530 // E.g. v1 = i24 load =>
3531 // v2 = i32 zextload (2 byte)
3532 // v3 = i32 load (1 byte)
3533 // v4 = i32 shl v3, 16
3534 // v5 = i32 or v4, v2
3535 // v1 = i24 trunc v5
3536 // By doing this we generate the correct truncate which should get
3537 // combined away as an artifact with a matching extend.
3538
3539 uint64_t LargeSplitSize, SmallSplitSize;
3540
3541 if (!isPowerOf2_32(MemSizeInBits)) {
3542 // This load needs splitting into power of 2 sized loads.
3543 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3544 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3545 } else {
3546 // This is already a power of 2, but we still need to split this in half.
3547 //
3548 // Assume we're being asked to decompose an unaligned load.
3549 // TODO: If this requires multiple splits, handle them all at once.
3550 auto &Ctx = MF.getFunction().getContext();
3551 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3552 return UnableToLegalize;
3553
3554 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3555 }
3556
3557 if (MemTy.isVector()) {
3558 // TODO: Handle vector extloads
3559 if (MemTy != DstTy)
3560 return UnableToLegalize;
3561
3562 // TODO: We can do better than scalarizing the vector and at least split it
3563 // in half.
3564 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3565 }
3566
3567 MachineMemOperand *LargeMMO =
3568 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3569 MachineMemOperand *SmallMMO =
3570 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3571
3572 LLT PtrTy = MRI.getType(PtrReg);
3573 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3574 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3575 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3576 PtrReg, *LargeMMO);
3577
3578 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3579 LargeSplitSize / 8);
3580 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3581 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3582 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3583 SmallPtr, *SmallMMO);
3584
3585 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3586 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3587
3588 if (AnyExtTy == DstTy)
3589 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3590 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3591 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3592 MIRBuilder.buildTrunc(DstReg, {Or});
3593 } else {
3594 assert(DstTy.isPointer() && "expected pointer");
3595 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3596
3597 // FIXME: We currently consider this to be illegal for non-integral address
3598 // spaces, but we need still need a way to reinterpret the bits.
3599 MIRBuilder.buildIntToPtr(DstReg, Or);
3600 }
3601
3602 LoadMI.eraseFromParent();
3603 return Legalized;
3604}
3605
3607 // Lower a non-power of 2 store into multiple pow-2 stores.
3608 // E.g. split an i24 store into an i16 store + i8 store.
3609 // We do this by first extending the stored value to the next largest power
3610 // of 2 type, and then using truncating stores to store the components.
3611 // By doing this, likewise with G_LOAD, generate an extend that can be
3612 // artifact-combined away instead of leaving behind extracts.
3613 Register SrcReg = StoreMI.getValueReg();
3614 Register PtrReg = StoreMI.getPointerReg();
3615 LLT SrcTy = MRI.getType(SrcReg);
3617 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3618 LLT MemTy = MMO.getMemoryType();
3619
3620 unsigned StoreWidth = MemTy.getSizeInBits();
3621 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3622
3623 if (StoreWidth != StoreSizeInBits) {
3624 if (SrcTy.isVector())
3625 return UnableToLegalize;
3626
3627 // Promote to a byte-sized store with upper bits zero if not
3628 // storing an integral number of bytes. For example, promote
3629 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3630 LLT WideTy = LLT::scalar(StoreSizeInBits);
3631
3632 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3633 // Avoid creating a store with a narrower source than result.
3634 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3635 SrcTy = WideTy;
3636 }
3637
3638 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3639
3640 MachineMemOperand *NewMMO =
3641 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3642 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3643 StoreMI.eraseFromParent();
3644 return Legalized;
3645 }
3646
3647 if (MemTy.isVector()) {
3648 // TODO: Handle vector trunc stores
3649 if (MemTy != SrcTy)
3650 return UnableToLegalize;
3651
3652 // TODO: We can do better than scalarizing the vector and at least split it
3653 // in half.
3654 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3655 }
3656
3657 unsigned MemSizeInBits = MemTy.getSizeInBits();
3658 uint64_t LargeSplitSize, SmallSplitSize;
3659
3660 if (!isPowerOf2_32(MemSizeInBits)) {
3661 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3662 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3663 } else {
3664 auto &Ctx = MF.getFunction().getContext();
3665 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3666 return UnableToLegalize; // Don't know what we're being asked to do.
3667
3668 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3669 }
3670
3671 // Extend to the next pow-2. If this store was itself the result of lowering,
3672 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3673 // that's wider than the stored size.
3674 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3675 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3676
3677 if (SrcTy.isPointer()) {
3678 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3679 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3680 }
3681
3682 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3683
3684 // Obtain the smaller value by shifting away the larger value.
3685 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3686 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3687
3688 // Generate the PtrAdd and truncating stores.
3689 LLT PtrTy = MRI.getType(PtrReg);
3690 auto OffsetCst = MIRBuilder.buildConstant(
3691 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3692 auto SmallPtr =
3693 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3694
3695 MachineMemOperand *LargeMMO =
3696 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3697 MachineMemOperand *SmallMMO =
3698 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3699 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3700 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3701 StoreMI.eraseFromParent();
3702 return Legalized;
3703}
3704
3706LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3707 switch (MI.getOpcode()) {
3708 case TargetOpcode::G_LOAD: {
3709 if (TypeIdx != 0)
3710 return UnableToLegalize;
3711 MachineMemOperand &MMO = **MI.memoperands_begin();
3712
3713 // Not sure how to interpret a bitcast of an extending load.
3714 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3715 return UnableToLegalize;
3716
3718 bitcastDst(MI, CastTy, 0);
3719 MMO.setType(CastTy);
3720 // The range metadata is no longer valid when reinterpreted as a different
3721 // type.
3722 MMO.clearRanges();
3724 return Legalized;
3725 }
3726 case TargetOpcode::G_STORE: {
3727 if (TypeIdx != 0)
3728 return UnableToLegalize;
3729
3730 MachineMemOperand &MMO = **MI.memoperands_begin();
3731
3732 // Not sure how to interpret a bitcast of a truncating store.
3733 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3734 return UnableToLegalize;
3735
3737 bitcastSrc(MI, CastTy, 0);
3738 MMO.setType(CastTy);
3740 return Legalized;
3741 }
3742 case TargetOpcode::G_SELECT: {
3743 if (TypeIdx != 0)
3744 return UnableToLegalize;
3745
3746 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3747 LLVM_DEBUG(
3748 dbgs() << "bitcast action not implemented for vector select\n");
3749 return UnableToLegalize;
3750 }
3751
3753 bitcastSrc(MI, CastTy, 2);
3754 bitcastSrc(MI, CastTy, 3);
3755 bitcastDst(MI, CastTy, 0);
3757 return Legalized;
3758 }
3759 case TargetOpcode::G_AND:
3760 case TargetOpcode::G_OR:
3761 case TargetOpcode::G_XOR: {
3763 bitcastSrc(MI, CastTy, 1);
3764 bitcastSrc(MI, CastTy, 2);
3765 bitcastDst(MI, CastTy, 0);
3767 return Legalized;
3768 }
3769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3770 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3771 case TargetOpcode::G_INSERT_VECTOR_ELT:
3772 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3773 case TargetOpcode::G_CONCAT_VECTORS:
3774 return bitcastConcatVector(MI, TypeIdx, CastTy);
3775 default:
3776 return UnableToLegalize;
3777 }
3778}
3779
3780// Legalize an instruction by changing the opcode in place.
3781void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3783 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3785}
3786
3788LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3789 using namespace TargetOpcode;
3790
3791 switch(MI.getOpcode()) {
3792 default:
3793 return UnableToLegalize;
3794 case TargetOpcode::G_FCONSTANT:
3795 return lowerFConstant(MI);
3796 case TargetOpcode::G_BITCAST:
3797 return lowerBitcast(MI);
3798 case TargetOpcode::G_SREM:
3799 case TargetOpcode::G_UREM: {
3800 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3801 auto Quot =
3802 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3803 {MI.getOperand(1), MI.getOperand(2)});
3804
3805 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3806 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3807 MI.eraseFromParent();
3808 return Legalized;
3809 }
3810 case TargetOpcode::G_SADDO:
3811 case TargetOpcode::G_SSUBO:
3812 return lowerSADDO_SSUBO(MI);
3813 case TargetOpcode::G_UMULH:
3814 case TargetOpcode::G_SMULH:
3815 return lowerSMULH_UMULH(MI);
3816 case TargetOpcode::G_SMULO:
3817 case TargetOpcode::G_UMULO: {
3818 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3819 // result.
3820 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3821 LLT Ty = MRI.getType(Res);
3822
3823 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3824 ? TargetOpcode::G_SMULH
3825 : TargetOpcode::G_UMULH;
3826
3828 const auto &TII = MIRBuilder.getTII();
3829 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3830 MI.removeOperand(1);
3832
3833 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3834 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3835
3836 // Move insert point forward so we can use the Res register if needed.
3838
3839 // For *signed* multiply, overflow is detected by checking:
3840 // (hi != (lo >> bitwidth-1))
3841 if (Opcode == TargetOpcode::G_SMULH) {
3842 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3843 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3844 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3845 } else {
3846 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3847 }
3848 return Legalized;
3849 }
3850 case TargetOpcode::G_FNEG: {
3851 auto [Res, SubByReg] = MI.getFirst2Regs();
3852 LLT Ty = MRI.getType(Res);
3853
3854 // TODO: Handle vector types once we are able to
3855 // represent them.
3856 if (Ty.isVector())
3857 return UnableToLegalize;
3858 auto SignMask =
3860 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3861 MI.eraseFromParent();
3862 return Legalized;
3863 }
3864 case TargetOpcode::G_FSUB:
3865 case TargetOpcode::G_STRICT_FSUB: {
3866 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3867 LLT Ty = MRI.getType(Res);
3868
3869 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3870 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3871
3872 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3873 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3874 else
3875 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3876
3877 MI.eraseFromParent();
3878 return Legalized;
3879 }
3880 case TargetOpcode::G_FMAD:
3881 return lowerFMad(MI);
3882 case TargetOpcode::G_FFLOOR:
3883 return lowerFFloor(MI);
3884 case TargetOpcode::G_LROUND:
3885 case TargetOpcode::G_LLROUND: {
3886 Register DstReg = MI.getOperand(0).getReg();
3887 Register SrcReg = MI.getOperand(1).getReg();
3888 LLT SrcTy = MRI.getType(SrcReg);
3889 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
3890 {SrcReg});
3891 MIRBuilder.buildFPTOSI(DstReg, Round);
3892 MI.eraseFromParent();
3893 return Legalized;
3894 }
3895 case TargetOpcode::G_INTRINSIC_ROUND:
3896 return lowerIntrinsicRound(MI);
3897 case TargetOpcode::G_FRINT: {
3898 // Since round even is the assumed rounding mode for unconstrained FP
3899 // operations, rint and roundeven are the same operation.
3900 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3901 return Legalized;
3902 }
3903 case TargetOpcode::G_INTRINSIC_LRINT:
3904 case TargetOpcode::G_INTRINSIC_LLRINT: {
3905 Register DstReg = MI.getOperand(0).getReg();
3906 Register SrcReg = MI.getOperand(1).getReg();
3907 LLT SrcTy = MRI.getType(SrcReg);
3908 auto Round =
3909 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
3910 MIRBuilder.buildFPTOSI(DstReg, Round);
3911 MI.eraseFromParent();
3912 return Legalized;
3913 }
3914 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3915 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3916 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3917 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3918 **MI.memoperands_begin());
3919 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3920 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3921 MI.eraseFromParent();
3922 return Legalized;
3923 }
3924 case TargetOpcode::G_LOAD:
3925 case TargetOpcode::G_SEXTLOAD:
3926 case TargetOpcode::G_ZEXTLOAD:
3927 return lowerLoad(cast<GAnyLoad>(MI));
3928 case TargetOpcode::G_STORE:
3929 return lowerStore(cast<GStore>(MI));
3930 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3931 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3932 case TargetOpcode::G_CTLZ:
3933 case TargetOpcode::G_CTTZ:
3934 case TargetOpcode::G_CTPOP:
3935 return lowerBitCount(MI);
3936 case G_UADDO: {
3937 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3938
3939 Register NewRes = MRI.cloneVirtualRegister(Res);
3940
3941 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3942 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3943
3944 MIRBuilder.buildCopy(Res, NewRes);
3945
3946 MI.eraseFromParent();
3947 return Legalized;
3948 }
3949 case G_UADDE: {
3950 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3951 const LLT CondTy = MRI.getType(CarryOut);
3952 const LLT Ty = MRI.getType(Res);
3953
3954 Register NewRes = MRI.cloneVirtualRegister(Res);
3955
3956 // Initial add of the two operands.
3957 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3958
3959 // Initial check for carry.
3960 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3961
3962 // Add the sum and the carry.
3963 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3964 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3965
3966 // Second check for carry. We can only carry if the initial sum is all 1s
3967 // and the carry is set, resulting in a new sum of 0.
3968 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3969 auto ResEqZero =
3970 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3971 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3972 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3973
3974 MIRBuilder.buildCopy(Res, NewRes);
3975
3976 MI.eraseFromParent();
3977 return Legalized;
3978 }
3979 case G_USUBO: {
3980 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3981
3982 MIRBuilder.buildSub(Res, LHS, RHS);
3984
3985 MI.eraseFromParent();
3986 return Legalized;
3987 }
3988 case G_USUBE: {
3989 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3990 const LLT CondTy = MRI.getType(BorrowOut);
3991 const LLT Ty = MRI.getType(Res);
3992
3993 // Initial subtract of the two operands.
3994 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3995
3996 // Initial check for borrow.
3997 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3998
3999 // Subtract the borrow from the first subtract.
4000 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4001 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4002
4003 // Second check for borrow. We can only borrow if the initial difference is
4004 // 0 and the borrow is set, resulting in a new difference of all 1s.
4005 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4006 auto TmpResEqZero =
4007 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4008 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4009 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4010
4011 MI.eraseFromParent();
4012 return Legalized;
4013 }
4014 case G_UITOFP:
4015 return lowerUITOFP(MI);
4016 case G_SITOFP:
4017 return lowerSITOFP(MI);
4018 case G_FPTOUI:
4019 return lowerFPTOUI(MI);
4020 case G_FPTOSI:
4021 return lowerFPTOSI(MI);
4022 case G_FPTRUNC:
4023 return lowerFPTRUNC(MI);
4024 case G_FPOWI:
4025 return lowerFPOWI(MI);
4026 case G_SMIN:
4027 case G_SMAX:
4028 case G_UMIN:
4029 case G_UMAX:
4030 return lowerMinMax(MI);
4031 case G_SCMP:
4032 case G_UCMP:
4033 return lowerThreewayCompare(MI);
4034 case G_FCOPYSIGN:
4035 return lowerFCopySign(MI);
4036 case G_FMINNUM:
4037 case G_FMAXNUM:
4038 return lowerFMinNumMaxNum(MI);
4039 case G_MERGE_VALUES:
4040 return lowerMergeValues(MI);
4041 case G_UNMERGE_VALUES:
4042 return lowerUnmergeValues(MI);
4043 case TargetOpcode::G_SEXT_INREG: {
4044 assert(MI.getOperand(2).isImm() && "Expected immediate");
4045 int64_t SizeInBits = MI.getOperand(2).getImm();
4046
4047 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4048 LLT DstTy = MRI.getType(DstReg);
4049 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4050
4051 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4052 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4053 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4054 MI.eraseFromParent();
4055 return Legalized;
4056 }
4057 case G_EXTRACT_VECTOR_ELT:
4058 case G_INSERT_VECTOR_ELT:
4060 case G_SHUFFLE_VECTOR:
4061 return lowerShuffleVector(MI);
4062 case G_VECTOR_COMPRESS:
4063 return lowerVECTOR_COMPRESS(MI);
4064 case G_DYN_STACKALLOC:
4065 return lowerDynStackAlloc(MI);
4066 case G_STACKSAVE:
4067 return lowerStackSave(MI);
4068 case G_STACKRESTORE:
4069 return lowerStackRestore(MI);
4070 case G_EXTRACT:
4071 return lowerExtract(MI);
4072 case G_INSERT:
4073 return lowerInsert(MI);
4074 case G_BSWAP:
4075 return lowerBswap(MI);
4076 case G_BITREVERSE:
4077 return lowerBitreverse(MI);
4078 case G_READ_REGISTER:
4079 case G_WRITE_REGISTER:
4080 return lowerReadWriteRegister(MI);
4081 case G_UADDSAT:
4082 case G_USUBSAT: {
4083 // Try to make a reasonable guess about which lowering strategy to use. The
4084 // target can override this with custom lowering and calling the
4085 // implementation functions.
4086 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4087 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4088 return lowerAddSubSatToMinMax(MI);
4090 }
4091 case G_SADDSAT:
4092 case G_SSUBSAT: {
4093 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4094
4095 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4096 // since it's a shorter expansion. However, we would need to figure out the
4097 // preferred boolean type for the carry out for the query.
4098 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4099 return lowerAddSubSatToMinMax(MI);
4101 }
4102 case G_SSHLSAT:
4103 case G_USHLSAT:
4104 return lowerShlSat(MI);
4105 case G_ABS:
4106 return lowerAbsToAddXor(MI);
4107 case G_SELECT:
4108 return lowerSelect(MI);
4109 case G_IS_FPCLASS:
4110 return lowerISFPCLASS(MI);
4111 case G_SDIVREM:
4112 case G_UDIVREM:
4113 return lowerDIVREM(MI);
4114 case G_FSHL:
4115 case G_FSHR:
4116 return lowerFunnelShift(MI);
4117 case G_ROTL:
4118 case G_ROTR:
4119 return lowerRotate(MI);
4120 case G_MEMSET:
4121 case G_MEMCPY:
4122 case G_MEMMOVE:
4123 return lowerMemCpyFamily(MI);
4124 case G_MEMCPY_INLINE:
4125 return lowerMemcpyInline(MI);
4126 case G_ZEXT:
4127 case G_SEXT:
4128 case G_ANYEXT:
4129 return lowerEXT(MI);
4130 case G_TRUNC:
4131 return lowerTRUNC(MI);
4133 return lowerVectorReduction(MI);
4134 case G_VAARG:
4135 return lowerVAArg(MI);
4136 }
4137}
4138
4140 Align MinAlign) const {
4141 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4142 // datalayout for the preferred alignment. Also there should be a target hook
4143 // for this to allow targets to reduce the alignment and ignore the
4144 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4145 // the type.
4146 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4147}
4148
4151 MachinePointerInfo &PtrInfo) {
4154 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4155
4156 unsigned AddrSpace = DL.getAllocaAddrSpace();
4157 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4158
4159 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4160 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4161}
4162
4164 LLT VecTy) {
4165 LLT IdxTy = B.getMRI()->getType(IdxReg);
4166 unsigned NElts = VecTy.getNumElements();
4167
4168 int64_t IdxVal;
4169 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4170 if (IdxVal < VecTy.getNumElements())
4171 return IdxReg;
4172 // If a constant index would be out of bounds, clamp it as well.
4173 }
4174
4175 if (isPowerOf2_32(NElts)) {
4176 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4177 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4178 }
4179
4180 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4181 .getReg(0);
4182}
4183
4185 Register Index) {
4186 LLT EltTy = VecTy.getElementType();
4187
4188 // Calculate the element offset and add it to the pointer.
4189 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4190 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4191 "Converting bits to bytes lost precision");
4192
4194
4195 // Convert index to the correct size for the address space.
4197 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4198 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4199 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4200 if (IdxTy != MRI.getType(Index))
4202
4203 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4204 MIRBuilder.buildConstant(IdxTy, EltSize));
4205
4206 LLT PtrTy = MRI.getType(VecPtr);
4207 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4208}
4209
4210#ifndef NDEBUG
4211/// Check that all vector operands have same number of elements. Other operands
4212/// should be listed in NonVecOp.
4215 std::initializer_list<unsigned> NonVecOpIndices) {
4216 if (MI.getNumMemOperands() != 0)
4217 return false;
4218
4219 LLT VecTy = MRI.getType(MI.getReg(0));
4220 if (!VecTy.isVector())
4221 return false;
4222 unsigned NumElts = VecTy.getNumElements();
4223
4224 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4225 MachineOperand &Op = MI.getOperand(OpIdx);
4226 if (!Op.isReg()) {
4227 if (!is_contained(NonVecOpIndices, OpIdx))
4228 return false;
4229 continue;
4230 }
4231
4232 LLT Ty = MRI.getType(Op.getReg());
4233 if (!Ty.isVector()) {
4234 if (!is_contained(NonVecOpIndices, OpIdx))
4235 return false;
4236 continue;
4237 }
4238
4239 if (Ty.getNumElements() != NumElts)
4240 return false;
4241 }
4242
4243 return true;
4244}
4245#endif
4246
4247/// Fill \p DstOps with DstOps that have same number of elements combined as
4248/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4249/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4250/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4251static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4252 unsigned NumElts) {
4253 LLT LeftoverTy;
4254 assert(Ty.isVector() && "Expected vector type");
4255 LLT EltTy = Ty.getElementType();
4256 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4257 int NumParts, NumLeftover;
4258 std::tie(NumParts, NumLeftover) =
4259 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4260
4261 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4262 for (int i = 0; i < NumParts; ++i) {
4263 DstOps.push_back(NarrowTy);
4264 }
4265
4266 if (LeftoverTy.isValid()) {
4267 assert(NumLeftover == 1 && "expected exactly one leftover");
4268 DstOps.push_back(LeftoverTy);
4269 }
4270}
4271
4272/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4273/// made from \p Op depending on operand type.
4274static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4275 MachineOperand &Op) {
4276 for (unsigned i = 0; i < N; ++i) {
4277 if (Op.isReg())
4278 Ops.push_back(Op.getReg());
4279 else if (Op.isImm())
4280 Ops.push_back(Op.getImm());
4281 else if (Op.isPredicate())
4282 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4283 else
4284 llvm_unreachable("Unsupported type");
4285 }
4286}
4287
4288// Handle splitting vector operations which need to have the same number of
4289// elements in each type index, but each type index may have a different element
4290// type.
4291//
4292// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4293// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4294// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4295//
4296// Also handles some irregular breakdown cases, e.g.
4297// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4298// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4299// s64 = G_SHL s64, s32
4302 GenericMachineInstr &MI, unsigned NumElts,
4303 std::initializer_list<unsigned> NonVecOpIndices) {
4304 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4305 "Non-compatible opcode or not specified non-vector operands");
4306 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4307
4308 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4309 unsigned NumDefs = MI.getNumDefs();
4310
4311 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4312 // Build instructions with DstOps to use instruction found by CSE directly.
4313 // CSE copies found instruction into given vreg when building with vreg dest.
4314 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4315 // Output registers will be taken from created instructions.
4316 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4317 for (unsigned i = 0; i < NumDefs; ++i) {
4318 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4319 }
4320
4321 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4322 // Operands listed in NonVecOpIndices will be used as is without splitting;
4323 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4324 // scalar condition (op 1), immediate in sext_inreg (op 2).
4325 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4326 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4327 ++UseIdx, ++UseNo) {
4328 if (is_contained(NonVecOpIndices, UseIdx)) {
4329 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4330 MI.getOperand(UseIdx));
4331 } else {
4332 SmallVector<Register, 8> SplitPieces;
4333 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4334 MRI);
4335 for (auto Reg : SplitPieces)
4336 InputOpsPieces[UseNo].push_back(Reg);
4337 }
4338 }
4339
4340 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4341
4342 // Take i-th piece of each input operand split and build sub-vector/scalar
4343 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4344 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4346 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4347 Defs.push_back(OutputOpsPieces[DstNo][i]);
4348
4350 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4351 Uses.push_back(InputOpsPieces[InputNo][i]);
4352
4353 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4354 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4355 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4356 }
4357
4358 // Merge small outputs into MI's output for each def operand.
4359 if (NumLeftovers) {
4360 for (unsigned i = 0; i < NumDefs; ++i)
4361 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4362 } else {
4363 for (unsigned i = 0; i < NumDefs; ++i)
4364 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4365 }
4366
4367 MI.eraseFromParent();
4368 return Legalized;
4369}
4370
4373 unsigned NumElts) {
4374 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4375
4376 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4377 unsigned NumDefs = MI.getNumDefs();
4378
4379 SmallVector<DstOp, 8> OutputOpsPieces;
4380 SmallVector<Register, 8> OutputRegs;
4381 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4382
4383 // Instructions that perform register split will be inserted in basic block
4384 // where register is defined (basic block is in the next operand).
4385 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4386 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4387 UseIdx += 2, ++UseNo) {
4388 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4390 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4391 MIRBuilder, MRI);
4392 }
4393
4394 // Build PHIs with fewer elements.
4395 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4396 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4397 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4398 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4399 Phi.addDef(
4400 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4401 OutputRegs.push_back(Phi.getReg(0));
4402
4403 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4404 Phi.addUse(InputOpsPieces[j][i]);
4405 Phi.add(MI.getOperand(1 + j * 2 + 1));
4406 }
4407 }
4408
4409 // Set the insert point after the existing PHIs
4410 MachineBasicBlock &MBB = *MI.getParent();
4412
4413 // Merge small outputs into MI's def.
4414 if (NumLeftovers) {
4415 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4416 } else {
4417 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4418 }
4419
4420 MI.eraseFromParent();
4421 return Legalized;
4422}
4423
4426 unsigned TypeIdx,
4427 LLT NarrowTy) {
4428 const int NumDst = MI.getNumOperands() - 1;
4429 const Register SrcReg = MI.getOperand(NumDst).getReg();
4430 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4431 LLT SrcTy = MRI.getType(SrcReg);
4432
4433 if (TypeIdx != 1 || NarrowTy == DstTy)
4434 return UnableToLegalize;
4435
4436 // Requires compatible types. Otherwise SrcReg should have been defined by
4437 // merge-like instruction that would get artifact combined. Most likely
4438 // instruction that defines SrcReg has to perform more/fewer elements
4439 // legalization compatible with NarrowTy.
4440 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4441 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4442
4443 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4444 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4445 return UnableToLegalize;
4446
4447 // This is most likely DstTy (smaller then register size) packed in SrcTy
4448 // (larger then register size) and since unmerge was not combined it will be
4449 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4450 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4451
4452 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4453 //
4454 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4455 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4456 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4457 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4458 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4459 const int PartsPerUnmerge = NumDst / NumUnmerge;
4460
4461 for (int I = 0; I != NumUnmerge; ++I) {
4462 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4463
4464 for (int J = 0; J != PartsPerUnmerge; ++J)
4465 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4466 MIB.addUse(Unmerge.getReg(I));
4467 }
4468
4469 MI.eraseFromParent();
4470 return Legalized;
4471}
4472
4475 LLT NarrowTy) {
4476 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4477 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4478 // that should have been artifact combined. Most likely instruction that uses
4479 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4480 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4481 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4482 if (NarrowTy == SrcTy)
4483 return UnableToLegalize;
4484
4485 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4486 // is for old mir tests. Since the changes to more/fewer elements it should no
4487 // longer be possible to generate MIR like this when starting from llvm-ir
4488 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4489 if (TypeIdx == 1) {
4490 assert(SrcTy.isVector() && "Expected vector types");
4491 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4492 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4493 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4494 return UnableToLegalize;
4495 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4496 //
4497 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4498 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4499 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4500 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4501 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4502 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4503
4505 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4506 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4507 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4508 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4509 Elts.push_back(Unmerge.getReg(j));
4510 }
4511
4512 SmallVector<Register, 8> NarrowTyElts;
4513 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4514 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4515 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4516 ++i, Offset += NumNarrowTyElts) {
4517 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4518 NarrowTyElts.push_back(
4519 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4520 }
4521
4522 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4523 MI.eraseFromParent();
4524 return Legalized;
4525 }
4526
4527 assert(TypeIdx == 0 && "Bad type index");
4528 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4529 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4530 return UnableToLegalize;
4531
4532 // This is most likely SrcTy (smaller then register size) packed in DstTy
4533 // (larger then register size) and since merge was not combined it will be
4534 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4535 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4536
4537 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4538 //
4539 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4540 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4541 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4542 SmallVector<Register, 8> NarrowTyElts;
4543 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4544 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4545 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4546 for (unsigned i = 0; i < NumParts; ++i) {
4548 for (unsigned j = 0; j < NumElts; ++j)
4549 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4550 NarrowTyElts.push_back(
4551 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4552 }
4553
4554 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4555 MI.eraseFromParent();
4556 return Legalized;
4557}
4558
4561 unsigned TypeIdx,
4562 LLT NarrowVecTy) {
4563 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4564 Register InsertVal;
4565 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4566
4567 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4568 if (IsInsert)
4569 InsertVal = MI.getOperand(2).getReg();
4570
4571 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4572
4573 // TODO: Handle total scalarization case.
4574 if (!NarrowVecTy.isVector())
4575 return UnableToLegalize;
4576
4577 LLT VecTy = MRI.getType(SrcVec);
4578
4579 // If the index is a constant, we can really break this down as you would
4580 // expect, and index into the target size pieces.
4581 int64_t IdxVal;
4582 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4583 if (MaybeCst) {
4584 IdxVal = MaybeCst->Value.getSExtValue();
4585 // Avoid out of bounds indexing the pieces.
4586 if (IdxVal >= VecTy.getNumElements()) {
4587 MIRBuilder.buildUndef(DstReg);
4588 MI.eraseFromParent();
4589 return Legalized;
4590 }
4591
4592 SmallVector<Register, 8> VecParts;
4593 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4594
4595 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4596 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4597 TargetOpcode::G_ANYEXT);
4598
4599 unsigned NewNumElts = NarrowVecTy.getNumElements();
4600
4601 LLT IdxTy = MRI.getType(Idx);
4602 int64_t PartIdx = IdxVal / NewNumElts;
4603 auto NewIdx =
4604 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4605
4606 if (IsInsert) {
4607 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4608
4609 // Use the adjusted index to insert into one of the subvectors.
4610 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4611 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4612 VecParts[PartIdx] = InsertPart.getReg(0);
4613
4614 // Recombine the inserted subvector with the others to reform the result
4615 // vector.
4616 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4617 } else {
4618 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4619 }
4620
4621 MI.eraseFromParent();
4622 return Legalized;
4623 }
4624
4625 // With a variable index, we can't perform the operation in a smaller type, so
4626 // we're forced to expand this.
4627 //
4628 // TODO: We could emit a chain of compare/select to figure out which piece to
4629 // index.
4631}
4632
4635 LLT NarrowTy) {
4636 // FIXME: Don't know how to handle secondary types yet.
4637 if (TypeIdx != 0)
4638 return UnableToLegalize;
4639
4640 // This implementation doesn't work for atomics. Give up instead of doing
4641 // something invalid.
4642 if (LdStMI.isAtomic())
4643 return UnableToLegalize;
4644
4645 bool IsLoad = isa<GLoad>(LdStMI);
4646 Register ValReg = LdStMI.getReg(0);
4647 Register AddrReg = LdStMI.getPointerReg();
4648 LLT ValTy = MRI.getType(ValReg);
4649
4650 // FIXME: Do we need a distinct NarrowMemory legalize action?
4651 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4652 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4653 return UnableToLegalize;
4654 }
4655
4656 int NumParts = -1;
4657 int NumLeftover = -1;
4658 LLT LeftoverTy;
4659 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4660 if (IsLoad) {
4661 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4662 } else {
4663 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4664 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4665 NumParts = NarrowRegs.size();
4666 NumLeftover = NarrowLeftoverRegs.size();
4667 }
4668 }
4669
4670 if (NumParts == -1)
4671 return UnableToLegalize;
4672
4673 LLT PtrTy = MRI.getType(AddrReg);
4674 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4675
4676 unsigned TotalSize = ValTy.getSizeInBits();
4677
4678 // Split the load/store into PartTy sized pieces starting at Offset. If this
4679 // is a load, return the new registers in ValRegs. For a store, each elements
4680 // of ValRegs should be PartTy. Returns the next offset that needs to be
4681 // handled.
4683 auto MMO = LdStMI.getMMO();
4684 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4685 unsigned NumParts, unsigned Offset) -> unsigned {
4687 unsigned PartSize = PartTy.getSizeInBits();
4688 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4689 ++Idx) {
4690 unsigned ByteOffset = Offset / 8;
4691 Register NewAddrReg;
4692
4693 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4694
4695 MachineMemOperand *NewMMO =
4696 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4697
4698 if (IsLoad) {
4699 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4700 ValRegs.push_back(Dst);
4701 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4702 } else {
4703 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4704 }
4705 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4706 }
4707
4708 return Offset;
4709 };
4710
4711 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4712 unsigned HandledOffset =
4713 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4714
4715 // Handle the rest of the register if this isn't an even type breakdown.
4716 if (LeftoverTy.isValid())
4717 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4718
4719 if (IsLoad) {
4720 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4721 LeftoverTy, NarrowLeftoverRegs);
4722 }
4723
4724 LdStMI.eraseFromParent();
4725 return Legalized;
4726}
4727
4730 LLT NarrowTy) {
4731 using namespace TargetOpcode;
4732 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4733 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4734
4735 switch (MI.getOpcode()) {
4736 case G_IMPLICIT_DEF:
4737 case G_TRUNC:
4738 case G_AND:
4739 case G_OR:
4740 case G_XOR:
4741 case G_ADD:
4742 case G_SUB:
4743 case G_MUL:
4744 case G_PTR_ADD:
4745 case G_SMULH:
4746 case G_UMULH:
4747 case G_FADD:
4748 case G_FMUL:
4749 case G_FSUB:
4750 case G_FNEG:
4751 case G_FABS:
4752 case G_FCANONICALIZE:
4753 case G_FDIV:
4754 case G_FREM:
4755 case G_FMA:
4756 case G_FMAD:
4757 case G_FPOW:
4758 case G_FEXP:
4759 case G_FEXP2:
4760 case G_FEXP10:
4761 case G_FLOG:
4762 case G_FLOG2:
4763 case G_FLOG10:
4764 case G_FLDEXP:
4765 case G_FNEARBYINT:
4766 case G_FCEIL:
4767 case G_FFLOOR:
4768 case G_FRINT:
4769 case G_INTRINSIC_LRINT:
4770 case G_INTRINSIC_LLRINT:
4771 case G_INTRINSIC_ROUND:
4772 case G_INTRINSIC_ROUNDEVEN:
4773 case G_INTRINSIC_TRUNC:
4774 case G_FCOS:
4775 case G_FSIN:
4776 case G_FTAN:
4777 case G_FACOS:
4778 case G_FASIN:
4779 case G_FATAN:
4780 case G_FCOSH:
4781 case G_FSINH:
4782 case G_FTANH:
4783 case G_FSQRT:
4784 case G_BSWAP:
4785 case G_BITREVERSE:
4786 case G_SDIV:
4787 case G_UDIV:
4788 case G_SREM:
4789 case G_UREM:
4790 case G_SDIVREM:
4791 case G_UDIVREM:
4792 case G_SMIN:
4793 case G_SMAX:
4794 case G_UMIN:
4795 case G_UMAX:
4796 case G_ABS:
4797 case G_FMINNUM:
4798 case G_FMAXNUM:
4799 case G_FMINNUM_IEEE:
4800 case G_FMAXNUM_IEEE:
4801 case G_FMINIMUM:
4802 case G_FMAXIMUM:
4803 case G_FSHL:
4804 case G_FSHR:
4805 case G_ROTL:
4806 case G_ROTR:
4807 case G_FREEZE:
4808 case G_SADDSAT:
4809 case G_SSUBSAT:
4810 case G_UADDSAT:
4811 case G_USUBSAT:
4812 case G_UMULO:
4813 case G_SMULO:
4814 case G_SHL:
4815 case G_LSHR:
4816 case G_ASHR:
4817 case G_SSHLSAT:
4818 case G_USHLSAT:
4819 case G_CTLZ:
4820 case G_CTLZ_ZERO_UNDEF:
4821 case G_CTTZ:
4822 case G_CTTZ_ZERO_UNDEF:
4823 case G_CTPOP:
4824 case G_FCOPYSIGN:
4825 case G_ZEXT:
4826 case G_SEXT:
4827 case G_ANYEXT:
4828 case G_FPEXT:
4829 case G_FPTRUNC:
4830 case G_SITOFP:
4831 case G_UITOFP:
4832 case G_FPTOSI:
4833 case G_FPTOUI:
4834 case G_INTTOPTR:
4835 case G_PTRTOINT:
4836 case G_ADDRSPACE_CAST:
4837 case G_UADDO:
4838 case G_USUBO:
4839 case G_UADDE:
4840 case G_USUBE:
4841 case G_SADDO:
4842 case G_SSUBO:
4843 case G_SADDE:
4844 case G_SSUBE:
4845 case G_STRICT_FADD:
4846 case G_STRICT_FSUB:
4847 case G_STRICT_FMUL:
4848 case G_STRICT_FMA:
4849 case G_STRICT_FLDEXP:
4850 case G_FFREXP:
4851 return fewerElementsVectorMultiEltType(GMI, NumElts);
4852 case G_ICMP:
4853 case G_FCMP:
4854 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4855 case G_IS_FPCLASS:
4856 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4857 case G_SELECT:
4858 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4859 return fewerElementsVectorMultiEltType(GMI, NumElts);
4860 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4861 case G_PHI:
4862 return fewerElementsVectorPhi(GMI, NumElts);
4863 case G_UNMERGE_VALUES:
4864 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4865 case G_BUILD_VECTOR:
4866 assert(TypeIdx == 0 && "not a vector type index");
4867 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4868 case G_CONCAT_VECTORS:
4869 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4870 return UnableToLegalize;
4871 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4872 case G_EXTRACT_VECTOR_ELT:
4873 case G_INSERT_VECTOR_ELT:
4874 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4875 case G_LOAD:
4876 case G_STORE:
4877 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4878 case G_SEXT_INREG:
4879 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4881 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4882 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4883 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4884 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4885 case G_SHUFFLE_VECTOR:
4886 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4887 case G_FPOWI:
4888 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4889 case G_BITCAST:
4890 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4891 case G_INTRINSIC_FPTRUNC_ROUND:
4892 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
4893 default:
4894 return UnableToLegalize;
4895 }
4896}
4897
4900 LLT NarrowTy) {
4901 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4902 "Not a bitcast operation");
4903
4904 if (TypeIdx != 0)
4905 return UnableToLegalize;
4906
4907 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4908
4909 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4910 LLT SrcNarrowTy =
4911 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4912
4913 // Split the Src and Dst Reg into smaller registers
4914 SmallVector<Register> SrcVRegs, BitcastVRegs;
4915 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4916 return UnableToLegalize;
4917
4918 // Build new smaller bitcast instructions
4919 // Not supporting Leftover types for now but will have to
4920 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4921 BitcastVRegs.push_back(
4922 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4923
4924 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4925 MI.eraseFromParent();
4926 return Legalized;
4927}
4928
4930 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4931 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4932 if (TypeIdx != 0)
4933 return UnableToLegalize;
4934
4935 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4936 MI.getFirst3RegLLTs();
4937 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4938 // The shuffle should be canonicalized by now.
4939 if (DstTy != Src1Ty)
4940 return UnableToLegalize;
4941 if (DstTy != Src2Ty)
4942 return UnableToLegalize;
4943
4944 if (!isPowerOf2_32(DstTy.getNumElements()))
4945 return UnableToLegalize;
4946
4947 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4948 // Further legalization attempts will be needed to do split further.
4949 NarrowTy =
4950 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4951 unsigned NewElts = NarrowTy.getNumElements();
4952
4953 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4954 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4955 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4956 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4957 SplitSrc2Regs[1]};
4958
4959 Register Hi, Lo;
4960
4961 // If Lo or Hi uses elements from at most two of the four input vectors, then
4962 // express it as a vector shuffle of those two inputs. Otherwise extract the
4963 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4965 for (unsigned High = 0; High < 2; ++High) {
4966 Register &Output = High ? Hi : Lo;
4967
4968 // Build a shuffle mask for the output, discovering on the fly which
4969 // input vectors to use as shuffle operands (recorded in InputUsed).
4970 // If building a suitable shuffle vector proves too hard, then bail
4971 // out with useBuildVector set.
4972 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4973 unsigned FirstMaskIdx = High * NewElts;
4974 bool UseBuildVector = false;
4975 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4976 // The mask element. This indexes into the input.
4977 int Idx = Mask[FirstMaskIdx + MaskOffset];
4978
4979 // The input vector this mask element indexes into.
4980 unsigned Input = (unsigned)Idx / NewElts;
4981
4982 if (Input >= std::size(Inputs)) {
4983 // The mask element does not index into any input vector.
4984 Ops.push_back(-1);
4985 continue;
4986 }
4987
4988 // Turn the index into an offset from the start of the input vector.
4989 Idx -= Input * NewElts;
4990
4991 // Find or create a shuffle vector operand to hold this input.
4992 unsigned OpNo;
4993 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4994 if (InputUsed[OpNo] == Input) {
4995 // This input vector is already an operand.
4996 break;
4997 } else if (InputUsed[OpNo] == -1U) {
4998 // Create a new operand for this input vector.
4999 InputUsed[OpNo] = Input;
5000 break;
5001 }
5002 }
5003
5004 if (OpNo >= std::size(InputUsed)) {
5005 // More than two input vectors used! Give up on trying to create a
5006 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5007 UseBuildVector = true;
5008 break;
5009 }
5010
5011 // Add the mask index for the new shuffle vector.
5012 Ops.push_back(Idx + OpNo * NewElts);
5013 }
5014
5015 if (UseBuildVector) {
5016 LLT EltTy = NarrowTy.getElementType();
5018
5019 // Extract the input elements by hand.
5020 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5021 // The mask element. This indexes into the input.
5022 int Idx = Mask[FirstMaskIdx + MaskOffset];
5023
5024 // The input vector this mask element indexes into.
5025 unsigned Input = (unsigned)Idx / NewElts;
5026
5027 if (Input >= std::size(Inputs)) {
5028 // The mask element is "undef" or indexes off the end of the input.
5029 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5030 continue;
5031 }
5032
5033 // Turn the index into an offset from the start of the input vector.
5034 Idx -= Input * NewElts;
5035
5036 // Extract the vector element by hand.
5037 SVOps.push_back(MIRBuilder
5038 .buildExtractVectorElement(
5039 EltTy, Inputs[Input],
5041 .getReg(0));
5042 }
5043
5044 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5045 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5046 } else if (InputUsed[0] == -1U) {
5047 // No input vectors were used! The result is undefined.
5048 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5049 } else {
5050 Register Op0 = Inputs[InputUsed[0]];
5051 // If only one input was used, use an undefined vector for the other.
5052 Register Op1 = InputUsed[1] == -1U
5053 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5054 : Inputs[InputUsed[1]];
5055 // At least one input vector was used. Create a new shuffle vector.
5056 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5057 }
5058
5059 Ops.clear();
5060 }
5061
5062 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
5063 MI.eraseFromParent();
5064 return Legalized;
5065}
5066
5068 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5069 auto &RdxMI = cast<GVecReduce>(MI);
5070
5071 if (TypeIdx != 1)
5072 return UnableToLegalize;
5073
5074 // The semantics of the normal non-sequential reductions allow us to freely
5075 // re-associate the operation.
5076 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5077
5078 if (NarrowTy.isVector() &&
5079 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5080 return UnableToLegalize;
5081
5082 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5083 SmallVector<Register> SplitSrcs;
5084 // If NarrowTy is a scalar then we're being asked to scalarize.
5085 const unsigned NumParts =
5086 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5087 : SrcTy.getNumElements();
5088
5089 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5090 if (NarrowTy.isScalar()) {
5091 if (DstTy != NarrowTy)
5092 return UnableToLegalize; // FIXME: handle implicit extensions.
5093
5094 if (isPowerOf2_32(NumParts)) {
5095 // Generate a tree of scalar operations to reduce the critical path.
5096 SmallVector<Register> PartialResults;
5097 unsigned NumPartsLeft = NumParts;
5098 while (NumPartsLeft > 1) {
5099 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5100 PartialResults.emplace_back(
5102 .buildInstr(ScalarOpc, {NarrowTy},
5103 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5104 .getReg(0));
5105 }
5106 SplitSrcs = PartialResults;
5107 PartialResults.clear();
5108 NumPartsLeft = SplitSrcs.size();
5109 }
5110 assert(SplitSrcs.size() == 1);
5111 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5112 MI.eraseFromParent();
5113 return Legalized;
5114 }
5115 // If we can't generate a tree, then just do sequential operations.
5116 Register Acc = SplitSrcs[0];
5117 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5118 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5119 .getReg(0);
5120 MIRBuilder.buildCopy(DstReg, Acc);
5121 MI.eraseFromParent();
5122 return Legalized;
5123 }
5124 SmallVector<Register> PartialReductions;
5125 for (unsigned Part = 0; Part < NumParts; ++Part) {
5126 PartialReductions.push_back(
5127 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5128 .getReg(0));
5129 }
5130
5131 // If the types involved are powers of 2, we can generate intermediate vector
5132 // ops, before generating a final reduction operation.
5133 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5134 isPowerOf2_32(NarrowTy.getNumElements())) {
5135 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5136 }
5137
5138 Register Acc = PartialReductions[0];
5139 for (unsigned Part = 1; Part < NumParts; ++Part) {
5140 if (Part == NumParts - 1) {
5141 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5142 {Acc, PartialReductions[Part]});
5143 } else {
5144 Acc = MIRBuilder
5145 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5146 .getReg(0);
5147 }
5148 }
5149 MI.eraseFromParent();
5150 return Legalized;
5151}
5152
5155 unsigned int TypeIdx,
5156 LLT NarrowTy) {
5157 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5158 MI.getFirst3RegLLTs();
5159 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5160 DstTy != NarrowTy)
5161 return UnableToLegalize;
5162
5163 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5164 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5165 "Unexpected vecreduce opcode");
5166 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5167 ? TargetOpcode::G_FADD
5168 : TargetOpcode::G_FMUL;
5169
5170 SmallVector<Register> SplitSrcs;
5171 unsigned NumParts = SrcTy.getNumElements();
5172 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5173 Register Acc = ScalarReg;
5174 for (unsigned i = 0; i < NumParts; i++)
5175 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5176 .getReg(0);
5177
5178 MIRBuilder.buildCopy(DstReg, Acc);
5179 MI.eraseFromParent();
5180 return Legalized;
5181}
5182
5184LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5185 LLT SrcTy, LLT NarrowTy,
5186 unsigned ScalarOpc) {
5187 SmallVector<Register> SplitSrcs;
5188 // Split the sources into NarrowTy size pieces.
5189 extractParts(SrcReg, NarrowTy,
5190 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5191 MIRBuilder, MRI);
5192 // We're going to do a tree reduction using vector operations until we have
5193 // one NarrowTy size value left.
5194 while (SplitSrcs.size() > 1) {
5195 SmallVector<Register> PartialRdxs;
5196 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5197 Register LHS = SplitSrcs[Idx];
5198 Register RHS = SplitSrcs[Idx + 1];
5199 // Create the intermediate vector op.
5200 Register Res =
5201 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5202 PartialRdxs.push_back(Res);
5203 }
5204 SplitSrcs = std::move(PartialRdxs);
5205 }
5206 // Finally generate the requested NarrowTy based reduction.
5208 MI.getOperand(1).setReg(SplitSrcs[0]);
5210 return Legalized;
5211}
5212
5215 const LLT HalfTy, const LLT AmtTy) {
5216
5217 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5218 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5219 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5220
5221 if (Amt.isZero()) {
5222 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5223 MI.eraseFromParent();
5224 return Legalized;
5225 }
5226
5227 LLT NVT = HalfTy;
5228 unsigned NVTBits = HalfTy.getSizeInBits();
5229 unsigned VTBits = 2 * NVTBits;
5230
5231 SrcOp Lo(Register(0)), Hi(Register(0));
5232 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5233 if (Amt.ugt(VTBits)) {
5234 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5235 } else if (Amt.ugt(NVTBits)) {
5236 Lo = MIRBuilder.buildConstant(NVT, 0);
5237 Hi = MIRBuilder.buildShl(NVT, InL,
5238 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5239 } else if (Amt == NVTBits) {
5240 Lo = MIRBuilder.buildConstant(NVT, 0);
5241 Hi = InL;
5242 } else {
5243 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5244 auto OrLHS =
5245 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5246 auto OrRHS = MIRBuilder.buildLShr(
5247 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5248 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5249 }
5250 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5251 if (Amt.ugt(VTBits)) {
5252 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5253 } else if (Amt.ugt(NVTBits)) {
5254 Lo = MIRBuilder.buildLShr(NVT, InH,
5255 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5256 Hi = MIRBuilder.buildConstant(NVT, 0);
5257 } else if (Amt == NVTBits) {
5258 Lo = InH;
5259 Hi = MIRBuilder.buildConstant(NVT, 0);
5260 } else {
5261 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5262
5263 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5264 auto OrRHS = MIRBuilder.buildShl(
5265 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5266
5267 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5268 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5269 }
5270 } else {
5271 if (Amt.ugt(VTBits)) {
5273 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5274 } else if (Amt.ugt(NVTBits)) {
5275 Lo = MIRBuilder.buildAShr(NVT, InH,
5276 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5277 Hi = MIRBuilder.buildAShr(NVT, InH,
5278 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5279 } else if (Amt == NVTBits) {
5280 Lo = InH;
5281 Hi = MIRBuilder.buildAShr(NVT, InH,
5282 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5283 } else {
5284 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5285
5286 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5287 auto OrRHS = MIRBuilder.buildShl(
5288 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5289
5290 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5291 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5292 }
5293 }
5294
5295 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5296 MI.eraseFromParent();
5297
5298 return Legalized;
5299}
5300
5301// TODO: Optimize if constant shift amount.
5304 LLT RequestedTy) {
5305 if (TypeIdx == 1) {
5307 narrowScalarSrc(MI, RequestedTy, 2);
5309 return Legalized;
5310 }
5311
5312 Register DstReg = MI.getOperand(0).getReg();
5313 LLT DstTy = MRI.getType(DstReg);
5314 if (DstTy.isVector())
5315 return UnableToLegalize;
5316
5317 Register Amt = MI.getOperand(2).getReg();
5318 LLT ShiftAmtTy = MRI.getType(Amt);
5319 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5320 if (DstEltSize % 2 != 0)
5321 return UnableToLegalize;
5322
5323 // Ignore the input type. We can only go to exactly half the size of the
5324 // input. If that isn't small enough, the resulting pieces will be further
5325 // legalized.
5326 const unsigned NewBitSize = DstEltSize / 2;
5327 const LLT HalfTy = LLT::scalar(NewBitSize);
5328 const LLT CondTy = LLT::scalar(1);
5329
5330 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5331 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5332 ShiftAmtTy);
5333 }
5334
5335 // TODO: Expand with known bits.
5336
5337 // Handle the fully general expansion by an unknown amount.
5338 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5339
5340 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5341 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5342 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5343
5344 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5345 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5346
5347 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5348 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5349 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5350
5351 Register ResultRegs[2];
5352 switch (MI.getOpcode()) {
5353 case TargetOpcode::G_SHL: {
5354 // Short: ShAmt < NewBitSize
5355 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5356
5357 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5358 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5359 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5360
5361 // Long: ShAmt >= NewBitSize
5362 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5363 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5364
5365 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5366 auto Hi = MIRBuilder.buildSelect(
5367 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5368
5369 ResultRegs[0] = Lo.getReg(0);
5370 ResultRegs[1] = Hi.getReg(0);
5371 break;
5372 }
5373 case TargetOpcode::G_LSHR:
5374 case TargetOpcode::G_ASHR: {
5375 // Short: ShAmt < NewBitSize
5376 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5377
5378 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5379 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5380 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5381
5382 // Long: ShAmt >= NewBitSize
5384 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5385 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5386 } else {
5387 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5388 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5389 }
5390 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5391 {InH, AmtExcess}); // Lo from Hi part.
5392
5393 auto Lo = MIRBuilder.buildSelect(
5394 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5395
5396 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5397
5398 ResultRegs[0] = Lo.getReg(0);
5399 ResultRegs[1] = Hi.getReg(0);
5400 break;
5401 }
5402 default:
5403 llvm_unreachable("not a shift");
5404 }
5405
5406 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5407 MI.eraseFromParent();
5408 return Legalized;
5409}
5410
5413 LLT MoreTy) {
5414 assert(TypeIdx == 0 && "Expecting only Idx 0");
5415
5417 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5418 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5420 moreElementsVectorSrc(MI, MoreTy, I);
5421 }
5422
5423 MachineBasicBlock &MBB = *MI.getParent();
5425 moreElementsVectorDst(MI, MoreTy, 0);
5427 return Legalized;
5428}
5429
5430MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5431 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5432 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5433
5434 switch (Opcode) {
5435 default:
5437 "getNeutralElementForVecReduce called with invalid opcode!");
5438 case TargetOpcode::G_VECREDUCE_ADD:
5439 case TargetOpcode::G_VECREDUCE_OR:
5440 case TargetOpcode::G_VECREDUCE_XOR:
5441 case TargetOpcode::G_VECREDUCE_UMAX:
5442 return MIRBuilder.buildConstant(Ty, 0);
5443 case TargetOpcode::G_VECREDUCE_MUL:
5444 return MIRBuilder.buildConstant(Ty, 1);
5445 case TargetOpcode::G_VECREDUCE_AND:
5446 case TargetOpcode::G_VECREDUCE_UMIN:
5449 case TargetOpcode::G_VECREDUCE_SMAX:
5452 case TargetOpcode::G_VECREDUCE_SMIN:
5455 case TargetOpcode::G_VECREDUCE_FADD:
5456 return MIRBuilder.buildFConstant(Ty, -0.0);
5457 case TargetOpcode::G_VECREDUCE_FMUL:
5458 return MIRBuilder.buildFConstant(Ty, 1.0);
5459 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5460 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5461 assert(false && "getNeutralElementForVecReduce unimplemented for "
5462 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5463 }
5464 llvm_unreachable("switch expected to return!");
5465}
5466
5469 LLT MoreTy) {
5470 unsigned Opc = MI.getOpcode();
5471 switch (Opc) {
5472 case TargetOpcode::G_IMPLICIT_DEF:
5473 case TargetOpcode::G_LOAD: {
5474 if (TypeIdx != 0)
5475 return UnableToLegalize;
5477 moreElementsVectorDst(MI, MoreTy, 0);
5479 return Legalized;
5480 }
5481 case TargetOpcode::G_STORE:
5482 if (TypeIdx != 0)
5483 return UnableToLegalize;
5485 moreElementsVectorSrc(MI, MoreTy, 0);
5487 return Legalized;
5488 case TargetOpcode::G_AND:
5489 case TargetOpcode::G_OR:
5490 case TargetOpcode::G_XOR:
5491 case TargetOpcode::G_ADD:
5492 case TargetOpcode::G_SUB:
5493 case TargetOpcode::G_MUL:
5494 case TargetOpcode::G_FADD:
5495 case TargetOpcode::G_FSUB:
5496 case TargetOpcode::G_FMUL:
5497 case TargetOpcode::G_FDIV:
5498 case TargetOpcode::G_FCOPYSIGN:
5499 case TargetOpcode::G_UADDSAT:
5500 case TargetOpcode::G_USUBSAT:
5501 case TargetOpcode::G_SADDSAT:
5502 case TargetOpcode::G_SSUBSAT:
5503 case TargetOpcode::G_SMIN:
5504 case TargetOpcode::G_SMAX:
5505 case TargetOpcode::G_UMIN:
5506 case TargetOpcode::G_UMAX:
5507 case TargetOpcode::G_FMINNUM:
5508 case TargetOpcode::G_FMAXNUM:
5509 case TargetOpcode::G_FMINNUM_IEEE:
5510 case TargetOpcode::G_FMAXNUM_IEEE:
5511 case TargetOpcode::G_FMINIMUM:
5512 case TargetOpcode::G_FMAXIMUM:
5513 case TargetOpcode::G_STRICT_FADD:
5514 case TargetOpcode::G_STRICT_FSUB:
5515 case TargetOpcode::G_STRICT_FMUL:
5516 case TargetOpcode::G_SHL:
5517 case TargetOpcode::G_ASHR:
5518 case TargetOpcode::G_LSHR: {
5520 moreElementsVectorSrc(MI, MoreTy, 1);
5521 moreElementsVectorSrc(MI, MoreTy, 2);
5522 moreElementsVectorDst(MI, MoreTy, 0);
5524 return Legalized;
5525 }
5526 case TargetOpcode::G_FMA:
5527 case TargetOpcode::G_STRICT_FMA:
5528 case TargetOpcode::G_FSHR:
5529 case TargetOpcode::G_FSHL: {
5531 moreElementsVectorSrc(MI, MoreTy, 1);
5532 moreElementsVectorSrc(MI, MoreTy, 2);
5533 moreElementsVectorSrc(MI, MoreTy, 3);
5534 moreElementsVectorDst(MI, MoreTy, 0);
5536 return Legalized;
5537 }
5538 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5539 case TargetOpcode::G_EXTRACT:
5540 if (TypeIdx != 1)
5541 return UnableToLegalize;
5543 moreElementsVectorSrc(MI, MoreTy, 1);
5545 return Legalized;
5546 case TargetOpcode::G_INSERT:
5547 case TargetOpcode::G_INSERT_VECTOR_ELT:
5548 case TargetOpcode::G_FREEZE:
5549 case TargetOpcode::G_FNEG:
5550 case TargetOpcode::G_FABS:
5551 case TargetOpcode::G_FSQRT:
5552 case TargetOpcode::G_FCEIL:
5553 case TargetOpcode::G_FFLOOR:
5554 case TargetOpcode::G_FNEARBYINT:
5555 case TargetOpcode::G_FRINT:
5556 case TargetOpcode::G_INTRINSIC_ROUND:
5557 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5558 case TargetOpcode::G_INTRINSIC_TRUNC:
5559 case TargetOpcode::G_BSWAP:
5560 case TargetOpcode::G_FCANONICALIZE:
5561 case TargetOpcode::G_SEXT_INREG:
5562 case TargetOpcode::G_ABS:
5563 if (TypeIdx != 0)
5564 return UnableToLegalize;
5566 moreElementsVectorSrc(MI, MoreTy, 1);
5567 moreElementsVectorDst(MI, MoreTy, 0);
5569 return Legalized;
5570 case TargetOpcode::G_SELECT: {
5571 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5572 if (TypeIdx == 1) {
5573 if (!CondTy.isScalar() ||
5574 DstTy.getElementCount() != MoreTy.getElementCount())
5575 return UnableToLegalize;
5576
5577 // This is turning a scalar select of vectors into a vector
5578 // select. Broadcast the select condition.
5579 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5581 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5583 return Legalized;
5584 }
5585
5586 if (CondTy.isVector())
5587 return UnableToLegalize;
5588
5590 moreElementsVectorSrc(MI, MoreTy, 2);
5591 moreElementsVectorSrc(MI, MoreTy, 3);
5592 moreElementsVectorDst(MI, MoreTy, 0);
5594 return Legalized;
5595 }
5596 case TargetOpcode::G_UNMERGE_VALUES:
5597 return UnableToLegalize;
5598 case TargetOpcode::G_PHI:
5599 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5600 case TargetOpcode::G_SHUFFLE_VECTOR:
5601 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5602 case TargetOpcode::G_BUILD_VECTOR: {
5604 for (auto Op : MI.uses()) {
5605 Elts.push_back(Op.getReg());
5606 }
5607
5608 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5610 }
5611
5613 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5614 MI.eraseFromParent();
5615 return Legalized;
5616 }
5617 case TargetOpcode::G_SEXT:
5618 case TargetOpcode::G_ZEXT:
5619 case TargetOpcode::G_ANYEXT:
5620 case TargetOpcode::G_TRUNC:
5621 case TargetOpcode::G_FPTRUNC:
5622 case TargetOpcode::G_FPEXT:
5623 case TargetOpcode::G_FPTOSI:
5624 case TargetOpcode::G_FPTOUI:
5625 case TargetOpcode::G_SITOFP:
5626 case TargetOpcode::G_UITOFP: {
5628 LLT SrcExtTy;
5629 LLT DstExtTy;
5630 if (TypeIdx == 0) {
5631 DstExtTy = MoreTy;
5632 SrcExtTy = LLT::fixed_vector(
5633 MoreTy.getNumElements(),
5634 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5635 } else {
5636 DstExtTy = LLT::fixed_vector(
5637 MoreTy.getNumElements(),
5638 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5639 SrcExtTy = MoreTy;
5640 }
5641 moreElementsVectorSrc(MI, SrcExtTy, 1);
5642 moreElementsVectorDst(MI, DstExtTy, 0);
5644 return Legalized;
5645 }
5646 case TargetOpcode::G_ICMP:
5647 case TargetOpcode::G_FCMP: {
5648 if (TypeIdx != 1)
5649 return UnableToLegalize;
5650
5652 moreElementsVectorSrc(MI, MoreTy, 2);
5653 moreElementsVectorSrc(MI, MoreTy, 3);
5654 LLT CondTy = LLT::fixed_vector(
5655 MoreTy.getNumElements(),
5656 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5657 moreElementsVectorDst(MI, CondTy, 0);
5659 return Legalized;
5660 }
5661 case TargetOpcode::G_BITCAST: {
5662 if (TypeIdx != 0)
5663 return UnableToLegalize;
5664
5665 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5666 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5667
5668 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5669 if (coefficient % DstTy.getNumElements() != 0)
5670 return UnableToLegalize;
5671
5672 coefficient = coefficient / DstTy.getNumElements();
5673
5674 LLT NewTy = SrcTy.changeElementCount(
5675 ElementCount::get(coefficient, MoreTy.isScalable()));
5677 moreElementsVectorSrc(MI, NewTy, 1);
5678 moreElementsVectorDst(MI, MoreTy, 0);
5680 return Legalized;
5681 }
5682 case TargetOpcode::G_VECREDUCE_FADD:
5683 case TargetOpcode::G_VECREDUCE_FMUL:
5684 case TargetOpcode::G_VECREDUCE_ADD:
5685 case TargetOpcode::G_VECREDUCE_MUL:
5686 case TargetOpcode::G_VECREDUCE_AND:
5687 case TargetOpcode::G_VECREDUCE_OR:
5688 case TargetOpcode::G_VECREDUCE_XOR:
5689 case TargetOpcode::G_VECREDUCE_SMAX:
5690 case TargetOpcode::G_VECREDUCE_SMIN:
5691 case TargetOpcode::G_VECREDUCE_UMAX:
5692 case TargetOpcode::G_VECREDUCE_UMIN: {
5693 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5694 MachineOperand &MO = MI.getOperand(1);
5695 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5696 auto NeutralElement = getNeutralElementForVecReduce(
5697 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5698
5700 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5701 i != e; i++) {
5702 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5703 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5704 NeutralElement, Idx);
5705 }
5706
5708 MO.setReg(NewVec.getReg(0));
5710 return Legalized;
5711 }
5712
5713 default:
5714 return UnableToLegalize;
5715 }
5716}
5717
5720 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5721 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5722 unsigned MaskNumElts = Mask.size();
5723 unsigned SrcNumElts = SrcTy.getNumElements();
5724 LLT DestEltTy = DstTy.getElementType();
5725
5726 if (MaskNumElts == SrcNumElts)
5727 return Legalized;
5728
5729 if (MaskNumElts < SrcNumElts) {
5730 // Extend mask to match new destination vector size with
5731 // undef values.
5732 SmallVector<int, 16> NewMask(Mask);
5733 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5734 NewMask.push_back(-1);
5735
5736 moreElementsVectorDst(MI, SrcTy, 0);
5738 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5739 MI.getOperand(1).getReg(),
5740 MI.getOperand(2).getReg(), NewMask);
5741 MI.eraseFromParent();
5742
5743 return Legalized;
5744 }
5745
5746 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5747 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5748 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5749
5750 // Create new source vectors by concatenating the initial
5751 // source vectors with undefined vectors of the same size.
5752 auto Undef = MIRBuilder.buildUndef(SrcTy);
5753 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5754 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5755 MOps1[0] = MI.getOperand(1).getReg();
5756 MOps2[0] = MI.getOperand(2).getReg();
5757
5758 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5759 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5760
5761 // Readjust mask for new input vector length.
5762 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5763 for (unsigned I = 0; I != MaskNumElts; ++I) {
5764 int Idx = Mask[I];
5765 if (Idx >= static_cast<int>(SrcNumElts))
5766 Idx += PaddedMaskNumElts - SrcNumElts;
5767 MappedOps[I] = Idx;
5768 }
5769
5770 // If we got more elements than required, extract subvector.
5771 if (MaskNumElts != PaddedMaskNumElts) {
5772 auto Shuffle =
5773 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5774
5775 SmallVector<Register, 16> Elts(MaskNumElts);
5776 for (unsigned I = 0; I < MaskNumElts; ++I) {
5777 Elts[I] =
5779 .getReg(0);
5780 }
5781 MIRBuilder.buildBuildVector(DstReg, Elts);
5782 } else {
5783 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5784 }
5785
5786 MI.eraseFromParent();
5788}
5789
5792 unsigned int TypeIdx, LLT MoreTy) {
5793 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5794 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5795 unsigned NumElts = DstTy.getNumElements();
5796 unsigned WidenNumElts = MoreTy.getNumElements();
5797
5798 if (DstTy.isVector() && Src1Ty.isVector() &&
5799 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5801 }
5802
5803 if (TypeIdx != 0)
5804 return UnableToLegalize;
5805
5806 // Expect a canonicalized shuffle.
5807 if (DstTy != Src1Ty || DstTy != Src2Ty)
5808 return UnableToLegalize;
5809
5810 moreElementsVectorSrc(MI, MoreTy, 1);
5811 moreElementsVectorSrc(MI, MoreTy, 2);
5812
5813 // Adjust mask based on new input vector length.
5814 SmallVector<int, 16> NewMask;
5815 for (unsigned I = 0; I != NumElts; ++I) {
5816 int Idx = Mask[I];
5817 if (Idx < static_cast<int>(NumElts))
5818 NewMask.push_back(Idx);
5819 else
5820 NewMask.push_back(Idx - NumElts + WidenNumElts);
5821 }
5822 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5823 NewMask.push_back(-1);
5824 moreElementsVectorDst(MI, MoreTy, 0);
5826 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5827 MI.getOperand(1).getReg(),
5828 MI.getOperand(2).getReg(), NewMask);
5829 MI.eraseFromParent();
5830 return Legalized;
5831}
5832
5833void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5834 ArrayRef<Register> Src1Regs,
5835 ArrayRef<Register> Src2Regs,
5836 LLT NarrowTy) {
5838 unsigned SrcParts = Src1Regs.size();
5839 unsigned DstParts = DstRegs.size();
5840
5841 unsigned DstIdx = 0; // Low bits of the result.
5842 Register FactorSum =
5843 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5844 DstRegs[DstIdx] = FactorSum;
5845
5846 unsigned CarrySumPrevDstIdx;
5848
5849 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5850 // Collect low parts of muls for DstIdx.
5851 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5852 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5854 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5855 Factors.push_back(Mul.getReg(0));
5856 }
5857 // Collect high parts of muls from previous DstIdx.
5858 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5859 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5860 MachineInstrBuilder Umulh =
5861 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5862 Factors.push_back(Umulh.getReg(0));
5863 }
5864 // Add CarrySum from additions calculated for previous DstIdx.
5865 if (DstIdx != 1) {
5866 Factors.push_back(CarrySumPrevDstIdx);
5867 }
5868
5869 Register CarrySum;
5870 // Add all factors and accumulate all carries into CarrySum.
5871 if (DstIdx != DstParts - 1) {
5872 MachineInstrBuilder Uaddo =
5873 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5874 FactorSum = Uaddo.getReg(0);
5875 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5876 for (unsigned i = 2; i < Factors.size(); ++i) {
5877 MachineInstrBuilder Uaddo =
5878 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5879 FactorSum = Uaddo.getReg(0);
5880 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5881 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5882 }
5883 } else {
5884 // Since value for the next index is not calculated, neither is CarrySum.
5885 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5886 for (unsigned i = 2; i < Factors.size(); ++i)
5887 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5888 }
5889
5890 CarrySumPrevDstIdx = CarrySum;
5891 DstRegs[DstIdx] = FactorSum;
5892 Factors.clear();
5893 }
5894}
5895
5898 LLT NarrowTy) {
5899 if (TypeIdx != 0)
5900 return UnableToLegalize;
5901
5902 Register DstReg = MI.getOperand(0).getReg();
5903 LLT DstType = MRI.getType(DstReg);
5904 // FIXME: add support for vector types
5905 if (DstType.isVector())
5906 return UnableToLegalize;
5907
5908 unsigned Opcode = MI.getOpcode();
5909 unsigned OpO, OpE, OpF;
5910 switch (Opcode) {
5911 case TargetOpcode::G_SADDO:
5912 case TargetOpcode::G_SADDE:
5913 case TargetOpcode::G_UADDO:
5914 case TargetOpcode::G_UADDE:
5915 case TargetOpcode::G_ADD:
5916 OpO = TargetOpcode::G_UADDO;
5917 OpE = TargetOpcode::G_UADDE;
5918 OpF = TargetOpcode::G_UADDE;
5919 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5920 OpF = TargetOpcode::G_SADDE;
5921 break;
5922 case TargetOpcode::G_SSUBO:
5923 case TargetOpcode::G_SSUBE:
5924 case TargetOpcode::G_USUBO:
5925 case TargetOpcode::G_USUBE:
5926 case TargetOpcode::G_SUB:
5927 OpO = TargetOpcode::G_USUBO;
5928 OpE = TargetOpcode::G_USUBE;
5929 OpF = TargetOpcode::G_USUBE;
5930 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5931 OpF = TargetOpcode::G_SSUBE;
5932 break;
5933 default:
5934 llvm_unreachable("Unexpected add/sub opcode!");
5935 }
5936
5937 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5938 unsigned NumDefs = MI.getNumExplicitDefs();
5939 Register Src1 = MI.getOperand(NumDefs).getReg();
5940 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5941 Register CarryDst, CarryIn;
5942 if (NumDefs == 2)
5943 CarryDst = MI.getOperand(1).getReg();
5944 if (MI.getNumOperands() == NumDefs + 3)
5945 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5946
5947 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5948 LLT LeftoverTy, DummyTy;
5949 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5950 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5951 MIRBuilder, MRI);
5952 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5953 MRI);
5954
5955 int NarrowParts = Src1Regs.size();
5956 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5957 Src1Regs.push_back(Src1Left[I]);
5958 Src2Regs.push_back(Src2Left[I]);
5959 }
5960 DstRegs.reserve(Src1Regs.size());
5961
5962 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5963 Register DstReg =
5964 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5966 // Forward the final carry-out to the destination register
5967 if (i == e - 1 && CarryDst)
5968 CarryOut = CarryDst;
5969
5970 if (!CarryIn) {
5971 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5972 {Src1Regs[i], Src2Regs[i]});
5973 } else if (i == e - 1) {
5974 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5975 {Src1Regs[i], Src2Regs[i], CarryIn});
5976 } else {
5977 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5978 {Src1Regs[i], Src2Regs[i], CarryIn});
5979 }
5980
5981 DstRegs.push_back(DstReg);
5982 CarryIn = CarryOut;
5983 }
5984 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5985 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5986 ArrayRef(DstRegs).drop_front(NarrowParts));
5987
5988 MI.eraseFromParent();
5989 return Legalized;
5990}
5991
5994 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5995
5996 LLT Ty = MRI.getType(DstReg);
5997 if (Ty.isVector())
5998 return UnableToLegalize;
5999
6000 unsigned Size = Ty.getSizeInBits();
6001 unsigned NarrowSize = NarrowTy.getSizeInBits();
6002 if (Size % NarrowSize != 0)
6003 return UnableToLegalize;
6004
6005 unsigned NumParts = Size / NarrowSize;
6006 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6007 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6008
6009 SmallVector<Register, 2> Src1Parts, Src2Parts;
6010 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6011 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6012 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6013 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6014
6015 // Take only high half of registers if this is high mul.
6016 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6017 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6018 MI.eraseFromParent();
6019 return Legalized;
6020}
6021
6024 LLT NarrowTy) {
6025 if (TypeIdx != 0)
6026 return UnableToLegalize;
6027
6028 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6029
6030 Register Src = MI.getOperand(1).getReg();
6031 LLT SrcTy = MRI.getType(Src);
6032
6033 // If all finite floats fit into the narrowed integer type, we can just swap
6034 // out the result type. This is practically only useful for conversions from
6035 // half to at least 16-bits, so just handle the one case.
6036 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6037 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6038 return UnableToLegalize;
6039
6041 narrowScalarDst(MI, NarrowTy, 0,
6042 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6044 return Legalized;
6045}
6046
6049 LLT NarrowTy) {
6050 if (TypeIdx != 1)
6051 return UnableToLegalize;
6052
6053 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6054
6055 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6056 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6057 // NarrowSize.
6058 if (SizeOp1 % NarrowSize != 0)
6059 return UnableToLegalize;
6060 int NumParts = SizeOp1 / NarrowSize;
6061
6062 SmallVector<Register, 2> SrcRegs, DstRegs;
6064 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6065 MIRBuilder, MRI);
6066
6067 Register OpReg = MI.getOperand(0).getReg();
6068 uint64_t OpStart = MI.getOperand(2).getImm();
6069 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6070 for (int i = 0; i < NumParts; ++i) {
6071 unsigned SrcStart = i * NarrowSize;
6072
6073 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6074 // No part of the extract uses this subregister, ignore it.
6075 continue;
6076 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6077 // The entire subregister is extracted, forward the value.
6078 DstRegs.push_back(SrcRegs[i]);
6079 continue;
6080 }
6081
6082 // OpSegStart is where this destination segment would start in OpReg if it
6083 // extended infinitely in both directions.
6084 int64_t ExtractOffset;
6085 uint64_t SegSize;
6086 if (OpStart < SrcStart) {
6087 ExtractOffset = 0;
6088 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6089 } else {
6090 ExtractOffset = OpStart - SrcStart;
6091 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6092 }
6093
6094 Register SegReg = SrcRegs[i];
6095 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6096 // A genuine extract is needed.
6097 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6098 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6099 }
6100
6101 DstRegs.push_back(SegReg);
6102 }
6103
6104 Register DstReg = MI.getOperand(0).getReg();
6105 if (MRI.getType(DstReg).isVector())
6106 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6107 else if (DstRegs.size() > 1)
6108 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6109 else
6110 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6111 MI.eraseFromParent();
6112 return Legalized;
6113}
6114
6117 LLT NarrowTy) {
6118 // FIXME: Don't know how to handle secondary types yet.
6119 if (TypeIdx != 0)
6120 return UnableToLegalize;
6121
6122 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6124 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6125 LLT LeftoverTy;
6126 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6127 LeftoverRegs, MIRBuilder, MRI);
6128
6129 for (Register Reg : LeftoverRegs)
6130 SrcRegs.push_back(Reg);
6131
6132 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6133 Register OpReg = MI.getOperand(2).getReg();
6134 uint64_t OpStart = MI.getOperand(3).getImm();
6135 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6136 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6137 unsigned DstStart = I * NarrowSize;
6138
6139 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6140 // The entire subregister is defined by this insert, forward the new
6141 // value.
6142 DstRegs.push_back(OpReg);
6143 continue;
6144 }
6145
6146 Register SrcReg = SrcRegs[I];
6147 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6148 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6149 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6150 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6151 }
6152
6153 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6154 // No part of the insert affects this subregister, forward the original.
6155 DstRegs.push_back(SrcReg);
6156 continue;
6157 }
6158
6159 // OpSegStart is where this destination segment would start in OpReg if it
6160 // extended infinitely in both directions.
6161 int64_t ExtractOffset, InsertOffset;
6162 uint64_t SegSize;
6163 if (OpStart < DstStart) {
6164 InsertOffset = 0;
6165 ExtractOffset = DstStart - OpStart;
6166 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6167 } else {
6168 InsertOffset = OpStart - DstStart;
6169 ExtractOffset = 0;
6170 SegSize =
6171 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6172 }
6173
6174 Register SegReg = OpReg;
6175 if (ExtractOffset != 0 || SegSize != OpSize) {
6176 // A genuine extract is needed.
6177 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6178 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6179 }
6180
6181 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6182 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6183 DstRegs.push_back(DstReg);
6184 }
6185
6186 uint64_t WideSize = DstRegs.size() * NarrowSize;
6187 Register DstReg = MI.getOperand(0).getReg();
6188 if (WideSize > RegTy.getSizeInBits()) {
6189 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6190 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6191 MIRBuilder.buildTrunc(DstReg, MergeReg);
6192 } else
6193 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6194
6195 MI.eraseFromParent();
6196 return Legalized;
6197}
6198
6201 LLT NarrowTy) {
6202 Register DstReg = MI.getOperand(0).getReg();
6203 LLT DstTy = MRI.getType(DstReg);
6204
6205 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6206
6207 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6208 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6209 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6210 LLT LeftoverTy;
6211 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6212 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6213 return UnableToLegalize;
6214
6215 LLT Unused;
6216 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6217 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6218 llvm_unreachable("inconsistent extractParts result");
6219
6220 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6221 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6222 {Src0Regs[I], Src1Regs[I]});
6223 DstRegs.push_back(Inst.getReg(0));
6224 }
6225
6226 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6227 auto Inst = MIRBuilder.buildInstr(
6228 MI.getOpcode(),
6229 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6230 DstLeftoverRegs.push_back(Inst.getReg(0));
6231 }
6232
6233 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6234 LeftoverTy, DstLeftoverRegs);
6235
6236 MI.eraseFromParent();
6237 return Legalized;
6238}
6239
6242 LLT NarrowTy) {
6243 if (TypeIdx != 0)
6244 return UnableToLegalize;
6245
6246 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6247
6248 LLT DstTy = MRI.getType(DstReg);
6249 if (DstTy.isVector())
6250 return UnableToLegalize;
6251
6253 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6254 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6255 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6256
6257 MI.eraseFromParent();
6258 return Legalized;
6259}
6260
6263 LLT NarrowTy) {
6264 if (TypeIdx != 0)
6265 return UnableToLegalize;
6266
6267 Register CondReg = MI.getOperand(1).getReg();
6268 LLT CondTy = MRI.getType(CondReg);
6269 if (CondTy.isVector()) // TODO: Handle vselect
6270 return UnableToLegalize;
6271
6272 Register DstReg = MI.getOperand(0).getReg();
6273 LLT DstTy = MRI.getType(DstReg);
6274
6275 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6276 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6277 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6278 LLT LeftoverTy;
6279 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6280 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6281 return UnableToLegalize;
6282
6283 LLT Unused;
6284 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6285 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6286 llvm_unreachable("inconsistent extractParts result");
6287
6288 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6289 auto Select = MIRBuilder.buildSelect(NarrowTy,
6290 CondReg, Src1Regs[I], Src2Regs[I]);
6291 DstRegs.push_back(Select.getReg(0));
6292 }
6293
6294 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6296 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6297 DstLeftoverRegs.push_back(Select.getReg(0));
6298 }
6299
6300 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6301 LeftoverTy, DstLeftoverRegs);
6302
6303 MI.eraseFromParent();
6304 return Legalized;
6305}
6306
6309 LLT NarrowTy) {
6310 if (TypeIdx != 1)
6311 return UnableToLegalize;
6312
6313 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6314 unsigned NarrowSize = NarrowTy.getSizeInBits();
6315
6316 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6317 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6318
6320 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6321 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6322 auto C_0 = B.buildConstant(NarrowTy, 0);
6323 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6324 UnmergeSrc.getReg(1), C_0);
6325 auto LoCTLZ = IsUndef ?
6326 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6327 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6328 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6329 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6330 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6331 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6332
6333 MI.eraseFromParent();
6334 return Legalized;
6335 }
6336
6337 return UnableToLegalize;
6338}
6339
6342 LLT NarrowTy) {
6343 if (TypeIdx != 1)
6344 return UnableToLegalize;
6345
6346 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6347 unsigned NarrowSize = NarrowTy.getSizeInBits();
6348
6349 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6350 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6351
6353 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6354 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6355 auto C_0 = B.buildConstant(NarrowTy, 0);
6356 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6357 UnmergeSrc.getReg(0), C_0);
6358 auto HiCTTZ = IsUndef ?
6359 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6360 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6361 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6362 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6363 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6364 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6365
6366 MI.eraseFromParent();
6367 return Legalized;
6368 }
6369
6370 return UnableToLegalize;
6371}
6372
6375 LLT NarrowTy) {
6376 if (TypeIdx != 1)
6377 return UnableToLegalize;
6378
6379 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6380 unsigned NarrowSize = NarrowTy.getSizeInBits();
6381
6382 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6383 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6384
6385 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6386 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6387 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6388
6389 MI.eraseFromParent();
6390 return Legalized;
6391 }
6392
6393 return UnableToLegalize;
6394}
6395
6398 LLT NarrowTy) {
6399 if (TypeIdx != 1)
6400 return UnableToLegalize;
6401
6403 Register ExpReg = MI.getOperand(2).getReg();
6404 LLT ExpTy = MRI.getType(ExpReg);
6405
6406 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6407
6408 // Clamp the exponent to the range of the target type.
6409 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6410 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6411 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6412 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6413
6414 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6416 MI.getOperand(2).setReg(Trunc.getReg(0));
6418 return Legalized;
6419}
6420
6423 unsigned Opc = MI.getOpcode();
6424 const auto &TII = MIRBuilder.getTII();
6425 auto isSupported = [this](const LegalityQuery &Q) {
6426 auto QAction = LI.getAction(Q).Action;
6427 return QAction == Legal || QAction == Libcall || QAction == Custom;
6428 };
6429 switch (Opc) {
6430 default:
6431 return UnableToLegalize;
6432 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6433 // This trivially expands to CTLZ.
6435 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6437 return Legalized;
6438 }
6439 case TargetOpcode::G_CTLZ: {
6440 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6441 unsigned Len = SrcTy.getSizeInBits();
6442
6443 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6444 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6445 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6446 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6447 auto ICmp = MIRBuilder.buildICmp(
6448 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6449 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6450 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6451 MI.eraseFromParent();
6452 return Legalized;
6453 }
6454 // for now, we do this:
6455 // NewLen = NextPowerOf2(Len);
6456 // x = x | (x >> 1);
6457 // x = x | (x >> 2);
6458 // ...
6459 // x = x | (x >>16);
6460 // x = x | (x >>32); // for 64-bit input
6461 // Upto NewLen/2
6462 // return Len - popcount(x);
6463 //
6464 // Ref: "Hacker's Delight" by Henry Warren
6465 Register Op = SrcReg;
6466 unsigned NewLen = PowerOf2Ceil(Len);
6467 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6468 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6469 auto MIBOp = MIRBuilder.buildOr(
6470 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6471 Op = MIBOp.getReg(0);
6472 }
6473 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6474 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6475 MIBPop);
6476 MI.eraseFromParent();
6477 return Legalized;
6478 }
6479 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6480 // This trivially expands to CTTZ.
6482 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6484 return Legalized;
6485 }
6486 case TargetOpcode::G_CTTZ: {
6487 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6488
6489 unsigned Len = SrcTy.getSizeInBits();
6490 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6491 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6492 // zero.
6493 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6494 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6495 auto ICmp = MIRBuilder.buildICmp(
6496 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6497 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6498 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6499 MI.eraseFromParent();
6500 return Legalized;
6501 }
6502 // for now, we use: { return popcount(~x & (x - 1)); }
6503 // unless the target has ctlz but not ctpop, in which case we use:
6504 // { return 32 - nlz(~x & (x-1)); }
6505 // Ref: "Hacker's Delight" by Henry Warren
6506 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6507 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6508 auto MIBTmp = MIRBuilder.buildAnd(
6509 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6510 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6511 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6512 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6513 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6514 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6515 MI.eraseFromParent();
6516 return Legalized;
6517 }
6519 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6520 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6522 return Legalized;
6523 }
6524 case TargetOpcode::G_CTPOP: {
6525 Register SrcReg = MI.getOperand(1).getReg();
6526 LLT Ty = MRI.getType(SrcReg);
6527 unsigned Size = Ty.getSizeInBits();
6529
6530 // Count set bits in blocks of 2 bits. Default approach would be
6531 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6532 // We use following formula instead:
6533 // B2Count = val - { (val >> 1) & 0x55555555 }
6534 // since it gives same result in blocks of 2 with one instruction less.
6535 auto C_1 = B.buildConstant(Ty, 1);
6536 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6537 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6538 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6539 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6540 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6541
6542 // In order to get count in blocks of 4 add values from adjacent block of 2.
6543 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6544 auto C_2 = B.buildConstant(Ty, 2);
6545 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6546 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6547 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6548 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6549 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6550 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6551
6552 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6553 // addition since count value sits in range {0,...,8} and 4 bits are enough
6554 // to hold such binary values. After addition high 4 bits still hold count
6555 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6556 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6557 auto C_4 = B.buildConstant(Ty, 4);
6558 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6559 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6560 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6561 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6562 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6563
6564 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6565 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6566 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6567 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6568
6569 // Shift count result from 8 high bits to low bits.
6570 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6571
6572 auto IsMulSupported = [this](const LLT Ty) {
6573 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6574 return Action == Legal || Action == WidenScalar || Action == Custom;
6575 };
6576 if (IsMulSupported(Ty)) {
6577 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6578 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6579 } else {
6580 auto ResTmp = B8Count;
6581 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6582 auto ShiftC = B.buildConstant(Ty, Shift);
6583 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6584 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6585 }
6586 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6587 }
6588 MI.eraseFromParent();
6589 return Legalized;
6590 }
6591 }
6592}
6593
6594// Check that (every element of) Reg is undef or not an exact multiple of BW.
6596 Register Reg, unsigned BW) {
6597 return matchUnaryPredicate(
6598 MRI, Reg,
6599 [=](const Constant *C) {
6600 // Null constant here means an undef.
6601 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6602 return !CI || CI->getValue().urem(BW) != 0;
6603 },
6604 /*AllowUndefs*/ true);
6605}
6606
6609 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6610 LLT Ty = MRI.getType(Dst);
6611 LLT ShTy = MRI.getType(Z);
6612
6613 unsigned BW = Ty.getScalarSizeInBits();
6614
6615 if (!isPowerOf2_32(BW))
6616 return UnableToLegalize;
6617
6618 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6619 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6620
6621 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6622 // fshl X, Y, Z -> fshr X, Y, -Z
6623 // fshr X, Y, Z -> fshl X, Y, -Z
6624 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6625 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6626 } else {
6627 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6628 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6629 auto One = MIRBuilder.buildConstant(ShTy, 1);
6630 if (IsFSHL) {
6631 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6632 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6633 } else {
6634 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6635 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6636 }
6637
6638 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6639 }
6640
6641 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6642 MI.eraseFromParent();
6643 return Legalized;
6644}
6645
6648 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6649 LLT Ty = MRI.getType(Dst);
6650 LLT ShTy = MRI.getType(Z);
6651
6652 const unsigned BW = Ty.getScalarSizeInBits();
6653 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6654
6655 Register ShX, ShY;
6656 Register ShAmt, InvShAmt;
6657
6658 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6659 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6660 // fshl: X << C | Y >> (BW - C)
6661 // fshr: X << (BW - C) | Y >> C
6662 // where C = Z % BW is not zero
6663 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6664 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6665 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6666 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6667 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6668 } else {
6669 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6670 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6671 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6672 if (isPowerOf2_32(BW)) {
6673 // Z % BW -> Z & (BW - 1)
6674 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6675 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6676 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6677 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6678 } else {
6679 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6680 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6681 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6682 }
6683
6684 auto One = MIRBuilder.buildConstant(ShTy, 1);
6685 if (IsFSHL) {
6686 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6687 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6688 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6689 } else {
6690 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6691 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6692 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6693 }
6694 }
6695
6696 MIRBuilder.buildOr(Dst, ShX, ShY);
6697 MI.eraseFromParent();
6698 return Legalized;
6699}
6700
6703 // These operations approximately do the following (while avoiding undefined
6704 // shifts by BW):
6705 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6706 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6707 Register Dst = MI.getOperand(0).getReg();
6708 LLT Ty = MRI.getType(Dst);
6709 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6710
6711 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6712 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6713
6714 // TODO: Use smarter heuristic that accounts for vector legalization.
6715 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6716 return lowerFunnelShiftAsShifts(MI);
6717
6718 // This only works for powers of 2, fallback to shifts if it fails.
6719 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6720 if (Result == UnableToLegalize)
6721 return lowerFunnelShiftAsShifts(MI);
6722 return Result;
6723}
6724
6726 auto [Dst, Src] = MI.getFirst2Regs();
6727 LLT DstTy = MRI.getType(Dst);
6728 LLT SrcTy = MRI.getType(Src);
6729
6730 uint32_t DstTySize = DstTy.getSizeInBits();
6731 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6732 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6733
6734 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6735 !isPowerOf2_32(SrcTyScalarSize))
6736 return UnableToLegalize;
6737
6738 // The step between extend is too large, split it by creating an intermediate
6739 // extend instruction
6740 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6741 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6742 // If the destination type is illegal, split it into multiple statements
6743 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6744 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6745 // Unmerge the vector
6746 LLT EltTy = MidTy.changeElementCount(
6748 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6749
6750 // ZExt the vectors
6751 LLT ZExtResTy = DstTy.changeElementCount(
6753 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6754 {UnmergeSrc.getReg(0)});
6755 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6756 {UnmergeSrc.getReg(1)});
6757
6758 // Merge the ending vectors
6759 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6760
6761 MI.eraseFromParent();
6762 return Legalized;
6763 }
6764 return UnableToLegalize;
6765}
6766
6768 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6770 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6771 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6772 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6773 // %lo16(<4 x s16>) = G_TRUNC %inlo
6774 // %hi16(<4 x s16>) = G_TRUNC %inhi
6775 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6776 // %res(<8 x s8>) = G_TRUNC %in16
6777
6778 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6779
6780 Register DstReg = MI.getOperand(0).getReg();
6781 Register SrcReg = MI.getOperand(1).getReg();
6782 LLT DstTy = MRI.getType(DstReg);
6783 LLT SrcTy = MRI.getType(SrcReg);
6784
6785 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6787 isPowerOf2_32(SrcTy.getNumElements()) &&
6789 // Split input type.
6790 LLT SplitSrcTy = SrcTy.changeElementCount(
6792
6793 // First, split the source into two smaller vectors.
6794 SmallVector<Register, 2> SplitSrcs;
6795 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6796
6797 // Truncate the splits into intermediate narrower elements.
6798 LLT InterTy;
6799 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6800 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6801 else
6802 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6803 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6804 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6805 }
6806
6807 // Combine the new truncates into one vector
6809 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6810
6811 // Truncate the new vector to the final result type
6812 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6813 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6814 else
6815 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6816
6817 MI.eraseFromParent();
6818
6819 return Legalized;
6820 }
6821 return UnableToLegalize;
6822}
6823
6826 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6827 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6828 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6829 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6830 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6831 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6832 MI.eraseFromParent();
6833 return Legalized;
6834}
6835
6837 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6838
6839 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6840 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6841
6843
6844 // If a rotate in the other direction is supported, use it.
6845 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6846 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6847 isPowerOf2_32(EltSizeInBits))
6848 return lowerRotateWithReverseRotate(MI);
6849
6850 // If a funnel shift is supported, use it.
6851 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6852 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6853 bool IsFShLegal = false;
6854 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6855 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6856 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6857 Register R3) {
6858 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6859 MI.eraseFromParent();
6860 return Legalized;
6861 };
6862 // If a funnel shift in the other direction is supported, use it.
6863 if (IsFShLegal) {
6864 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6865 } else if (isPowerOf2_32(EltSizeInBits)) {
6866 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6867 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6868 }
6869 }
6870
6871 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6872 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6873 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6874 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6875 Register ShVal;
6876 Register RevShiftVal;
6877 if (isPowerOf2_32(EltSizeInBits)) {
6878 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6879 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6880 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6881 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6882 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6883 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6884 RevShiftVal =
6885 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6886 } else {
6887 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6888 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6889 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6890 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6891 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6892 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6893 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6894 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6895 RevShiftVal =
6896 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6897 }
6898 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6899 MI.eraseFromParent();
6900 return Legalized;
6901}
6902
6903// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6904// representation.
6907 auto [Dst, Src] = MI.getFirst2Regs();
6908 const LLT S64 = LLT::scalar(64);
6909 const LLT S32 = LLT::scalar(32);
6910 const LLT S1 = LLT::scalar(1);
6911
6912 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6913
6914 // unsigned cul2f(ulong u) {
6915 // uint lz = clz(u);
6916 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6917 // u = (u << lz) & 0x7fffffffffffffffUL;
6918 // ulong t = u & 0xffffffffffUL;
6919 // uint v = (e << 23) | (uint)(u >> 40);
6920 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6921 // return as_float(v + r);
6922 // }
6923
6924 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6925 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6926
6927 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6928
6929 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6930 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6931
6932 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6933 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6934
6935 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6936 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6937
6938 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6939
6940 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6941 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6942
6943 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6944 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6945 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6946
6947 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6948 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6949 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6950 auto One = MIRBuilder.buildConstant(S32, 1);
6951
6952 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6953 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6954 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6955 MIRBuilder.buildAdd(Dst, V, R);
6956
6957 MI.eraseFromParent();
6958 return Legalized;
6959}
6960
6962 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6963
6964 if (SrcTy == LLT::scalar(1)) {
6965 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6966 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6967 MIRBuilder.buildSelect(Dst, Src, True, False);
6968 MI.eraseFromParent();
6969 return Legalized;
6970 }
6971
6972 if (SrcTy != LLT::scalar(64))
6973 return UnableToLegalize;
6974
6975 if (DstTy == LLT::scalar(32)) {
6976 // TODO: SelectionDAG has several alternative expansions to port which may
6977 // be more reasonble depending on the available instructions. If a target
6978 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6979 // intermediate type, this is probably worse.
6980 return lowerU64ToF32BitOps(MI);
6981 }
6982
6983 return UnableToLegalize;
6984}
6985
6987 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6988
6989 const LLT S64 = LLT::scalar(64);
6990 const LLT S32 = LLT::scalar(32);
6991 const LLT S1 = LLT::scalar(1);
6992
6993 if (SrcTy == S1) {
6994 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6995 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6996 MIRBuilder.buildSelect(Dst, Src, True, False);
6997 MI.eraseFromParent();
6998 return Legalized;
6999 }
7000
7001 if (SrcTy != S64)
7002 return UnableToLegalize;
7003
7004 if (DstTy == S32) {
7005 // signed cl2f(long l) {
7006 // long s = l >> 63;
7007 // float r = cul2f((l + s) ^ s);
7008 // return s ? -r : r;
7009 // }
7010 Register L = Src;
7011 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7012 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7013
7014 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7015 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7016 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7017
7018 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7019 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7021 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7022 MI.eraseFromParent();
7023 return Legalized;
7024 }
7025
7026 return UnableToLegalize;
7027}
7028
7030 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7031 const LLT S64 = LLT::scalar(64);
7032 const LLT S32 = LLT::scalar(32);
7033
7034 if (SrcTy != S64 && SrcTy != S32)
7035 return UnableToLegalize;
7036 if (DstTy != S32 && DstTy != S64)
7037 return UnableToLegalize;
7038
7039 // FPTOSI gives same result as FPTOUI for positive signed integers.
7040 // FPTOUI needs to deal with fp values that convert to unsigned integers
7041 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7042
7043 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7044 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7046 APInt::getZero(SrcTy.getSizeInBits()));
7047 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7048
7049 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7050
7051 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7052 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7053 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7054 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7055 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7056 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7057 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7058
7059 const LLT S1 = LLT::scalar(1);
7060
7061 MachineInstrBuilder FCMP =
7062 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7063 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7064
7065 MI.eraseFromParent();
7066 return Legalized;
7067}
7068
7070 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7071 const LLT S64 = LLT::scalar(64);
7072 const LLT S32 = LLT::scalar(32);
7073
7074 // FIXME: Only f32 to i64 conversions are supported.
7075 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7076 return UnableToLegalize;
7077
7078 // Expand f32 -> i64 conversion
7079 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7080 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7081
7082 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7083
7084 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7085 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7086
7087 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7088 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7089
7090 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7091 APInt::getSignMask(SrcEltBits));
7092 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7093 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7094 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7095 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7096
7097 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7098 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7099 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7100
7101 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7102 R = MIRBuilder.buildZExt(DstTy, R);
7103
7104 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7105 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7106 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7107 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7108
7109 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7110 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7111
7112 const LLT S1 = LLT::scalar(1);
7114 S1, Exponent, ExponentLoBit);
7115
7116 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7117
7118 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7119 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7120
7121 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7122
7123 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7124 S1, Exponent, ZeroSrcTy);
7125
7126 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7127 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7128
7129 MI.eraseFromParent();
7130 return Legalized;
7131}
7132
7133// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7136 const LLT S1 = LLT::scalar(1);
7137 const LLT S32 = LLT::scalar(32);
7138
7139 auto [Dst, Src] = MI.getFirst2Regs();
7140 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7141 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7142
7143 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7144 return UnableToLegalize;
7145
7147 unsigned Flags = MI.getFlags();
7148 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7149 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7150 MI.eraseFromParent();
7151 return Legalized;
7152 }
7153
7154 const unsigned ExpMask = 0x7ff;
7155 const unsigned ExpBiasf64 = 1023;
7156 const unsigned ExpBiasf16 = 15;
7157
7158 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7159 Register U = Unmerge.getReg(0);
7160 Register UH = Unmerge.getReg(1);
7161
7162 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7164
7165 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7166 // add the f16 bias (15) to get the biased exponent for the f16 format.
7167 E = MIRBuilder.buildAdd(
7168 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7169
7172
7173 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7174 MIRBuilder.buildConstant(S32, 0x1ff));
7175 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7176
7177 auto Zero = MIRBuilder.buildConstant(S32, 0);
7178 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7179 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7180 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7181
7182 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7183 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7184 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7185 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7186
7187 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7188 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7189
7190 // N = M | (E << 12);
7191 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7192 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7193
7194 // B = clamp(1-E, 0, 13);
7195 auto One = MIRBuilder.buildConstant(S32, 1);
7196 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7197 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7199
7200 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7201 MIRBuilder.buildConstant(S32, 0x1000));
7202
7203 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7204 auto D0 = MIRBuilder.buildShl(S32, D, B);
7205
7206 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7207 D0, SigSetHigh);
7208 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7209 D = MIRBuilder.buildOr(S32, D, D1);
7210
7211 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7212 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7213
7214 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7216
7217 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7219 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7220
7221 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7223 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7224
7225 V1 = MIRBuilder.buildOr(S32, V0, V1);
7226 V = MIRBuilder.buildAdd(S32, V, V1);
7227
7228 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7229 E, MIRBuilder.buildConstant(S32, 30));
7230 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7231 MIRBuilder.buildConstant(S32, 0x7c00), V);
7232
7233 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7234 E, MIRBuilder.buildConstant(S32, 1039));
7235 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7236
7237 // Extract the sign bit.
7238 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7239 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7240
7241 // Insert the sign bit
7242 V = MIRBuilder.buildOr(S32, Sign, V);
7243
7244 MIRBuilder.buildTrunc(Dst, V);
7245 MI.eraseFromParent();
7246 return Legalized;
7247}
7248
7251 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7252 const LLT S64 = LLT::scalar(64);
7253 const LLT S16 = LLT::scalar(16);
7254
7255 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7257
7258 return UnableToLegalize;
7259}
7260
7262 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7263 LLT Ty = MRI.getType(Dst);
7264
7265 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7266 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7267 MI.eraseFromParent();
7268 return Legalized;
7269}
7270
7272 switch (Opc) {
7273 case TargetOpcode::G_SMIN:
7274 return CmpInst::ICMP_SLT;
7275 case TargetOpcode::G_SMAX:
7276 return CmpInst::ICMP_SGT;
7277 case TargetOpcode::G_UMIN:
7278 return CmpInst::ICMP_ULT;
7279 case TargetOpcode::G_UMAX:
7280 return CmpInst::ICMP_UGT;
7281 default:
7282 llvm_unreachable("not in integer min/max");
7283 }
7284}
7285
7287 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7288
7289 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7290 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7291
7292 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7293 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7294
7295 MI.eraseFromParent();
7296 return Legalized;
7297}
7298
7301 GSUCmp *Cmp = cast<GSUCmp>(&MI);
7302
7303 Register Dst = Cmp->getReg(0);
7304 LLT DstTy = MRI.getType(Dst);
7305 LLT CmpTy = DstTy.changeElementSize(1);
7306
7307 CmpInst::Predicate LTPredicate = Cmp->isSigned()
7310 CmpInst::Predicate GTPredicate = Cmp->isSigned()
7313
7314 auto One = MIRBuilder.buildConstant(DstTy, 1);
7315 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
7316 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
7317 Cmp->getRHSReg());
7318 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
7319
7320 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
7321 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
7322 Cmp->getRHSReg());
7323 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
7324
7325 MI.eraseFromParent();
7326 return Legalized;
7327}
7328
7331 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7332 const int Src0Size = Src0Ty.getScalarSizeInBits();
7333 const int Src1Size = Src1Ty.getScalarSizeInBits();
7334
7335 auto SignBitMask = MIRBuilder.buildConstant(
7336 Src0Ty, APInt::getSignMask(Src0Size));
7337
7338 auto NotSignBitMask = MIRBuilder.buildConstant(
7339 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7340
7341 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7342 Register And1;
7343 if (Src0Ty == Src1Ty) {
7344 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7345 } else if (Src0Size > Src1Size) {
7346 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7347 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7348 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7349 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7350 } else {
7351 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7352 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7353 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7354 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7355 }
7356
7357 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7358 // constants are a nan and -0.0, but the final result should preserve
7359 // everything.
7360 unsigned Flags = MI.getFlags();
7361
7362 // We masked the sign bit and the not-sign bit, so these are disjoint.
7363 Flags |= MachineInstr::Disjoint;
7364
7365 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7366
7367 MI.eraseFromParent();
7368 return Legalized;
7369}
7370
7373 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7374 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7375
7376 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7377 LLT Ty = MRI.getType(Dst);
7378
7379 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7380 // Insert canonicalizes if it's possible we need to quiet to get correct
7381 // sNaN behavior.
7382
7383 // Note this must be done here, and not as an optimization combine in the
7384 // absence of a dedicate quiet-snan instruction as we're using an
7385 // omni-purpose G_FCANONICALIZE.
7386 if (!isKnownNeverSNaN(Src0, MRI))
7387 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7388
7389 if (!isKnownNeverSNaN(Src1, MRI))
7390 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7391 }
7392
7393 // If there are no nans, it's safe to simply replace this with the non-IEEE
7394 // version.
7395 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7396 MI.eraseFromParent();
7397 return Legalized;
7398}
7399
7401 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7402 Register DstReg = MI.getOperand(0).getReg();
7403 LLT Ty = MRI.getType(DstReg);
7404 unsigned Flags = MI.getFlags();
7405
7406 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7407 Flags);
7408 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7409 MI.eraseFromParent();
7410 return Legalized;
7411}
7412
7415 auto [DstReg, X] = MI.getFirst2Regs();
7416 const unsigned Flags = MI.getFlags();
7417 const LLT Ty = MRI.getType(DstReg);
7418 const LLT CondTy = Ty.changeElementSize(1);
7419
7420 // round(x) =>
7421 // t = trunc(x);
7422 // d = fabs(x - t);
7423 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7424 // return t + o;
7425
7426 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7427
7428 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7429 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7430
7431 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7432 auto Cmp =
7433 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7434
7435 // Could emit G_UITOFP instead
7436 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7437 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7438 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7439 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7440
7441 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7442
7443 MI.eraseFromParent();
7444 return Legalized;
7445}
7446
7448 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7449 unsigned Flags = MI.getFlags();
7450 LLT Ty = MRI.getType(DstReg);
7451 const LLT CondTy = Ty.changeElementSize(1);
7452
7453 // result = trunc(src);
7454 // if (src < 0.0 && src != result)
7455 // result += -1.0.
7456
7457 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7458 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7459
7460 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7461 SrcReg, Zero, Flags);
7462 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7463 SrcReg, Trunc, Flags);
7464 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7465 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7466
7467 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7468 MI.eraseFromParent();
7469 return Legalized;
7470}
7471
7474 const unsigned NumOps = MI.getNumOperands();
7475 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7476 unsigned PartSize = Src0Ty.getSizeInBits();
7477
7478 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7479 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7480
7481 for (unsigned I = 2; I != NumOps; ++I) {
7482 const unsigned Offset = (I - 1) * PartSize;
7483
7484 Register SrcReg = MI.getOperand(I).getReg();
7485 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7486
7487 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7488 MRI.createGenericVirtualRegister(WideTy);
7489
7490 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7491 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7492 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7493 ResultReg = NextResult;
7494 }
7495
7496 if (DstTy.isPointer()) {
7498 DstTy.getAddressSpace())) {
7499 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7500 return UnableToLegalize;
7501 }
7502
7503 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7504 }
7505
7506 MI.eraseFromParent();
7507 return Legalized;
7508}
7509
7512 const unsigned NumDst = MI.getNumOperands() - 1;
7513 Register SrcReg = MI.getOperand(NumDst).getReg();
7514 Register Dst0Reg = MI.getOperand(0).getReg();
7515 LLT DstTy = MRI.getType(Dst0Reg);
7516 if (DstTy.isPointer())
7517 return UnableToLegalize; // TODO
7518
7519 SrcReg = coerceToScalar(SrcReg);
7520 if (!SrcReg)
7521 return UnableToLegalize;
7522
7523 // Expand scalarizing unmerge as bitcast to integer and shift.
7524 LLT IntTy = MRI.getType(SrcReg);
7525
7526 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7527
7528 const unsigned DstSize = DstTy.getSizeInBits();
7529 unsigned Offset = DstSize;
7530 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7531 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7532 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7533 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7534 }
7535
7536 MI.eraseFromParent();
7537 return Legalized;
7538}
7539
7540/// Lower a vector extract or insert by writing the vector to a stack temporary
7541/// and reloading the element or vector.
7542///
7543/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7544/// =>
7545/// %stack_temp = G_FRAME_INDEX
7546/// G_STORE %vec, %stack_temp
7547/// %idx = clamp(%idx, %vec.getNumElements())
7548/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7549/// %dst = G_LOAD %element_ptr
7552 Register DstReg = MI.getOperand(0).getReg();
7553 Register SrcVec = MI.getOperand(1).getReg();
7554 Register InsertVal;
7555 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7556 InsertVal = MI.getOperand(2).getReg();
7557
7558 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7559
7560 LLT VecTy = MRI.getType(SrcVec);
7561 LLT EltTy = VecTy.getElementType();
7562 unsigned NumElts = VecTy.getNumElements();
7563
7564 int64_t IdxVal;
7565 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7567 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7568
7569 if (InsertVal) {
7570 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7571 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7572 } else {
7573 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7574 }
7575
7576 MI.eraseFromParent();
7577 return Legalized;
7578 }
7579
7580 if (!EltTy.isByteSized()) { // Not implemented.
7581 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7582 return UnableToLegalize;
7583 }
7584
7585 unsigned EltBytes = EltTy.getSizeInBytes();
7586 Align VecAlign = getStackTemporaryAlignment(VecTy);
7587 Align EltAlign;
7588
7589 MachinePointerInfo PtrInfo;
7590 auto StackTemp = createStackTemporary(
7591 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7592 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7593
7594 // Get the pointer to the element, and be sure not to hit undefined behavior
7595 // if the index is out of bounds.
7596 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7597
7598 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7599 int64_t Offset = IdxVal * EltBytes;
7600 PtrInfo = PtrInfo.getWithOffset(Offset);
7601 EltAlign = commonAlignment(VecAlign, Offset);
7602 } else {
7603 // We lose information with a variable offset.
7604 EltAlign = getStackTemporaryAlignment(EltTy);
7605 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7606 }
7607
7608 if (InsertVal) {
7609 // Write the inserted element
7610 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7611
7612 // Reload the whole vector.
7613 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7614 } else {
7615 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7616 }
7617
7618 MI.eraseFromParent();
7619 return Legalized;
7620}
7621
7624 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7625 MI.getFirst3RegLLTs();
7626 LLT IdxTy = LLT::scalar(32);
7627
7628 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7629 Register Undef;
7631 LLT EltTy = DstTy.getScalarType();
7632
7633 for (int Idx : Mask) {
7634 if (Idx < 0) {
7635 if (!Undef.isValid())
7636 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7637 BuildVec.push_back(Undef);
7638 continue;
7639 }
7640
7641 if (Src0Ty.isScalar()) {
7642 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7643 } else {
7644 int NumElts = Src0Ty.getNumElements();
7645 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7646 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7647 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7648 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7649 BuildVec.push_back(Extract.getReg(0));
7650 }
7651 }
7652
7653 if (DstTy.isScalar())
7654 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7655 else
7656 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7657 MI.eraseFromParent();
7658 return Legalized;
7659}
7660
7663 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7664 MI.getFirst4RegLLTs();
7665
7666 if (VecTy.isScalableVector())
7667 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
7668
7669 Align VecAlign = getStackTemporaryAlignment(VecTy);
7670 MachinePointerInfo PtrInfo;
7671 Register StackPtr =
7672 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
7673 PtrInfo)
7674 .getReg(0);
7675 MachinePointerInfo ValPtrInfo =
7677
7678 LLT IdxTy = LLT::scalar(32);
7679 LLT ValTy = VecTy.getElementType();
7680 Align ValAlign = getStackTemporaryAlignment(ValTy);
7681
7682 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
7683
7684 bool HasPassthru =
7685 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
7686
7687 if (HasPassthru)
7688 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
7689
7690 Register LastWriteVal;
7691 std::optional<APInt> PassthruSplatVal =
7692 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
7693
7694 if (PassthruSplatVal.has_value()) {
7695 LastWriteVal =
7696 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
7697 } else if (HasPassthru) {
7698 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
7699 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
7700 {LLT::scalar(32)}, {Popcount});
7701
7702 Register LastElmtPtr =
7703 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
7704 LastWriteVal =
7705 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
7706 .getReg(0);
7707 }
7708
7709 unsigned NumElmts = VecTy.getNumElements();
7710 for (unsigned I = 0; I < NumElmts; ++I) {
7711 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
7712 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
7713 Register ElmtPtr =
7714 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7715 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
7716
7717 LLT MaskITy = MaskTy.getElementType();
7718 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
7719 if (MaskITy.getSizeInBits() > 1)
7720 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
7721
7722 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
7723 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
7724
7725 if (HasPassthru && I == NumElmts - 1) {
7726 auto EndOfVector =
7727 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
7728 auto AllLanesSelected = MIRBuilder.buildICmp(
7729 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
7730 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
7731 {OutPos, EndOfVector});
7732 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7733
7734 LastWriteVal =
7735 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
7736 .getReg(0);
7737 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
7738 }
7739 }
7740
7741 // TODO: Use StackPtr's FrameIndex alignment.
7742 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
7743
7744 MI.eraseFromParent();
7745 return Legalized;
7746}
7747
7749 Register AllocSize,
7750 Align Alignment,
7751 LLT PtrTy) {
7752 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7753
7754 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7755 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7756
7757 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7758 // have to generate an extra instruction to negate the alloc and then use
7759 // G_PTR_ADD to add the negative offset.
7760 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7761 if (Alignment > Align(1)) {
7762 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7763 AlignMask.negate();
7764 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7765 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7766 }
7767
7768 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7769}
7770
7773 const auto &MF = *MI.getMF();
7774 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7775 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7776 return UnableToLegalize;
7777
7778 Register Dst = MI.getOperand(0).getReg();
7779 Register AllocSize = MI.getOperand(1).getReg();
7780 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7781
7782 LLT PtrTy = MRI.getType(Dst);
7784 Register SPTmp =
7785 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7786
7787 MIRBuilder.buildCopy(SPReg, SPTmp);
7788 MIRBuilder.buildCopy(Dst, SPTmp);
7789
7790 MI.eraseFromParent();
7791 return Legalized;
7792}
7793
7797 if (!StackPtr)
7798 return UnableToLegalize;
7799
7800 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7801 MI.eraseFromParent();
7802 return Legalized;
7803}
7804
7808 if (!StackPtr)
7809 return UnableToLegalize;
7810
7811 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7812 MI.eraseFromParent();
7813 return Legalized;
7814}
7815
7818 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7819 unsigned Offset = MI.getOperand(2).getImm();
7820
7821 // Extract sub-vector or one element
7822 if (SrcTy.isVector()) {
7823 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7824 unsigned DstSize = DstTy.getSizeInBits();
7825
7826 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7827 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7828 // Unmerge and allow access to each Src element for the artifact combiner.
7829 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7830
7831 // Take element(s) we need to extract and copy it (merge them).
7832 SmallVector<Register, 8> SubVectorElts;
7833 for (unsigned Idx = Offset / SrcEltSize;
7834 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7835 SubVectorElts.push_back(Unmerge.getReg(Idx));
7836 }
7837 if (SubVectorElts.size() == 1)
7838 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7839 else
7840 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7841
7842 MI.eraseFromParent();
7843 return Legalized;
7844 }
7845 }
7846
7847 if (DstTy.isScalar() &&
7848 (SrcTy.isScalar() ||
7849 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7850 LLT SrcIntTy = SrcTy;
7851 if (!SrcTy.isScalar()) {
7852 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7853 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7854 }
7855
7856 if (Offset == 0)
7857 MIRBuilder.buildTrunc(DstReg, SrcReg);
7858 else {
7859 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7860 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7861 MIRBuilder.buildTrunc(DstReg, Shr);
7862 }
7863
7864 MI.eraseFromParent();
7865 return Legalized;
7866 }
7867
7868 return UnableToLegalize;
7869}
7870
7872 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7873 uint64_t Offset = MI.getOperand(3).getImm();
7874
7875 LLT DstTy = MRI.getType(Src);
7876 LLT InsertTy = MRI.getType(InsertSrc);
7877
7878 // Insert sub-vector or one element
7879 if (DstTy.isVector() && !InsertTy.isPointer()) {
7880 LLT EltTy = DstTy.getElementType();
7881 unsigned EltSize = EltTy.getSizeInBits();
7882 unsigned InsertSize = InsertTy.getSizeInBits();
7883
7884 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7885 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7886 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7888 unsigned Idx = 0;
7889 // Elements from Src before insert start Offset
7890 for (; Idx < Offset / EltSize; ++Idx) {
7891 DstElts.push_back(UnmergeSrc.getReg(Idx));
7892 }
7893
7894 // Replace elements in Src with elements from InsertSrc
7895 if (InsertTy.getSizeInBits() > EltSize) {
7896 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7897 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7898 ++Idx, ++i) {
7899 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7900 }
7901 } else {
7902 DstElts.push_back(InsertSrc);
7903 ++Idx;
7904 }
7905
7906 // Remaining elements from Src after insert
7907 for (; Idx < DstTy.getNumElements(); ++Idx) {
7908 DstElts.push_back(UnmergeSrc.getReg(Idx));
7909 }
7910
7911 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7912 MI.eraseFromParent();
7913 return Legalized;
7914 }
7915 }
7916
7917 if (InsertTy.isVector() ||
7918 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7919 return UnableToLegalize;
7920
7922 if ((DstTy.isPointer() &&
7923 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7924 (InsertTy.isPointer() &&
7925 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7926 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7927 return UnableToLegalize;
7928 }
7929
7930 LLT IntDstTy = DstTy;
7931
7932 if (!DstTy.isScalar()) {
7933 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7934 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7935 }
7936
7937 if (!InsertTy.isScalar()) {
7938 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7939 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7940 }
7941
7942 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7943 if (Offset != 0) {
7944 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7945 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7946 }
7947
7949 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7950
7951 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7952 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7953 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7954
7955 MIRBuilder.buildCast(Dst, Or);
7956 MI.eraseFromParent();
7957 return Legalized;
7958}
7959
7962 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7963 MI.getFirst4RegLLTs();
7964 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7965
7966 LLT Ty = Dst0Ty;
7967 LLT BoolTy = Dst1Ty;
7968
7969 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7970
7971 if (IsAdd)
7972 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
7973 else
7974 MIRBuilder.buildSub(NewDst0, LHS, RHS);
7975
7976 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7977
7978 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7979
7980 // For an addition, the result should be less than one of the operands (LHS)
7981 // if and only if the other operand (RHS) is negative, otherwise there will
7982 // be overflow.
7983 // For a subtraction, the result should be less than one of the operands
7984 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7985 // otherwise there will be overflow.
7986 auto ResultLowerThanLHS =
7987 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
7988 auto ConditionRHS = MIRBuilder.buildICmp(
7989 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7990
7991 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7992
7993 MIRBuilder.buildCopy(Dst0, NewDst0);
7994 MI.eraseFromParent();
7995
7996 return Legalized;
7997}
7998
8001 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8002 LLT Ty = MRI.getType(Res);
8003 bool IsSigned;
8004 bool IsAdd;
8005 unsigned BaseOp;
8006 switch (MI.getOpcode()) {
8007 default:
8008 llvm_unreachable("unexpected addsat/subsat opcode");
8009 case TargetOpcode::G_UADDSAT:
8010 IsSigned = false;
8011 IsAdd = true;
8012 BaseOp = TargetOpcode::G_ADD;
8013 break;
8014 case TargetOpcode::G_SADDSAT:
8015 IsSigned = true;
8016 IsAdd = true;
8017 BaseOp = TargetOpcode::G_ADD;
8018 break;
8019 case TargetOpcode::G_USUBSAT:
8020 IsSigned = false;
8021 IsAdd = false;
8022 BaseOp = TargetOpcode::G_SUB;
8023 break;
8024 case TargetOpcode::G_SSUBSAT:
8025 IsSigned = true;
8026 IsAdd = false;
8027 BaseOp = TargetOpcode::G_SUB;
8028 break;
8029 }
8030
8031 if (IsSigned) {
8032 // sadd.sat(a, b) ->
8033 // hi = 0x7fffffff - smax(a, 0)
8034 // lo = 0x80000000 - smin(a, 0)
8035 // a + smin(smax(lo, b), hi)
8036 // ssub.sat(a, b) ->
8037 // lo = smax(a, -1) - 0x7fffffff
8038 // hi = smin(a, -1) - 0x80000000
8039 // a - smin(smax(lo, b), hi)
8040 // TODO: AMDGPU can use a "median of 3" instruction here:
8041 // a +/- med3(lo, b, hi)
8042 uint64_t NumBits = Ty.getScalarSizeInBits();
8043 auto MaxVal =
8045 auto MinVal =
8048 if (IsAdd) {
8049 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8050 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8051 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8052 } else {
8053 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8054 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8055 MaxVal);
8056 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8057 MinVal);
8058 }
8059 auto RHSClamped =
8061 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8062 } else {
8063 // uadd.sat(a, b) -> a + umin(~a, b)
8064 // usub.sat(a, b) -> a - umin(a, b)
8065 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8066 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8067 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8068 }
8069
8070 MI.eraseFromParent();
8071 return Legalized;
8072}
8073
8076 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8077 LLT Ty = MRI.getType(Res);
8078 LLT BoolTy = Ty.changeElementSize(1);
8079 bool IsSigned;
8080 bool IsAdd;
8081 unsigned OverflowOp;
8082 switch (MI.getOpcode()) {
8083 default:
8084 llvm_unreachable("unexpected addsat/subsat opcode");
8085 case TargetOpcode::G_UADDSAT:
8086 IsSigned = false;
8087 IsAdd = true;
8088 OverflowOp = TargetOpcode::G_UADDO;
8089 break;
8090 case TargetOpcode::G_SADDSAT:
8091 IsSigned = true;
8092 IsAdd = true;
8093 OverflowOp = TargetOpcode::G_SADDO;
8094 break;
8095 case TargetOpcode::G_USUBSAT:
8096 IsSigned = false;
8097 IsAdd = false;
8098 OverflowOp = TargetOpcode::G_USUBO;
8099 break;
8100 case TargetOpcode::G_SSUBSAT:
8101 IsSigned = true;
8102 IsAdd = false;
8103 OverflowOp = TargetOpcode::G_SSUBO;
8104 break;
8105 }
8106
8107 auto OverflowRes =
8108 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8109 Register Tmp = OverflowRes.getReg(0);
8110 Register Ov = OverflowRes.getReg(1);
8111 MachineInstrBuilder Clamp;
8112 if (IsSigned) {
8113 // sadd.sat(a, b) ->
8114 // {tmp, ov} = saddo(a, b)
8115 // ov ? (tmp >>s 31) + 0x80000000 : r
8116 // ssub.sat(a, b) ->
8117 // {tmp, ov} = ssubo(a, b)
8118 // ov ? (tmp >>s 31) + 0x80000000 : r
8119 uint64_t NumBits = Ty.getScalarSizeInBits();
8120 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8121 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8122 auto MinVal =
8124 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8125 } else {
8126 // uadd.sat(a, b) ->
8127 // {tmp, ov} = uaddo(a, b)
8128 // ov ? 0xffffffff : tmp
8129 // usub.sat(a, b) ->
8130 // {tmp, ov} = usubo(a, b)
8131 // ov ? 0 : tmp
8132 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8133 }
8134 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8135
8136 MI.eraseFromParent();
8137 return Legalized;
8138}
8139
8142 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8143 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8144 "Expected shlsat opcode!");
8145 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8146 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8147 LLT Ty = MRI.getType(Res);
8148 LLT BoolTy = Ty.changeElementSize(1);
8149
8150 unsigned BW = Ty.getScalarSizeInBits();
8151 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8152 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8153 : MIRBuilder.buildLShr(Ty, Result, RHS);
8154
8155 MachineInstrBuilder SatVal;
8156 if (IsSigned) {
8157 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8158 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8159 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8160 MIRBuilder.buildConstant(Ty, 0));
8161 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8162 } else {
8164 }
8165 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8166 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8167
8168 MI.eraseFromParent();
8169 return Legalized;
8170}
8171
8173 auto [Dst, Src] = MI.getFirst2Regs();
8174 const LLT Ty = MRI.getType(Src);
8175 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8176 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8177
8178 // Swap most and least significant byte, set remaining bytes in Res to zero.
8179 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8180 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8181 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8182 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8183
8184 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8185 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8186 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8187 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8188 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8189 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8190 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8191 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8192 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8193 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8194 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8195 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8196 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8197 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8198 }
8199 Res.getInstr()->getOperand(0).setReg(Dst);
8200
8201 MI.eraseFromParent();
8202 return Legalized;
8203}
8204
8205//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8207 MachineInstrBuilder Src, const APInt &Mask) {
8208 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8209 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8210 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8211 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8212 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8213 return B.buildOr(Dst, LHS, RHS);
8214}
8215
8218 auto [Dst, Src] = MI.getFirst2Regs();
8219 const LLT Ty = MRI.getType(Src);
8220 unsigned Size = Ty.getScalarSizeInBits();
8221
8222 if (Size >= 8) {
8223 MachineInstrBuilder BSWAP =
8224 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8225
8226 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8227 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8228 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8229 MachineInstrBuilder Swap4 =
8230 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8231
8232 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8233 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8234 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8235 MachineInstrBuilder Swap2 =
8236 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8237
8238 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8239 // 6|7
8240 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8241 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8242 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8243 } else {
8244 // Expand bitreverse for types smaller than 8 bits.
8246 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8248 if (I < J) {
8249 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8250 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8251 } else {
8252 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8253 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8254 }
8255
8256 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
8257 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8258 if (I == 0)
8259 Tmp = Tmp2;
8260 else
8261 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8262 }
8263 MIRBuilder.buildCopy(Dst, Tmp);
8264 }
8265
8266 MI.eraseFromParent();
8267 return Legalized;
8268}
8269
8273
8274 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8275 int NameOpIdx = IsRead ? 1 : 0;
8276 int ValRegIndex = IsRead ? 0 : 1;
8277
8278 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8279 const LLT Ty = MRI.getType(ValReg);
8280 const MDString *RegStr = cast<MDString>(
8281 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8282
8283 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8284 if (!PhysReg.isValid())
8285 return UnableToLegalize;
8286
8287 if (IsRead)
8288 MIRBuilder.buildCopy(ValReg, PhysReg);
8289 else
8290 MIRBuilder.buildCopy(PhysReg, ValReg);
8291
8292 MI.eraseFromParent();
8293 return Legalized;
8294}
8295
8298 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8299 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8300 Register Result = MI.getOperand(0).getReg();
8301 LLT OrigTy = MRI.getType(Result);
8302 auto SizeInBits = OrigTy.getScalarSizeInBits();
8303 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8304
8305 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8306 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8307 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8308 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8309
8310 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8311 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8312 MIRBuilder.buildTrunc(Result, Shifted);
8313
8314 MI.eraseFromParent();
8315 return Legalized;
8316}
8317
8320 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8321 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8322
8323 if (Mask == fcNone) {
8324 MIRBuilder.buildConstant(DstReg, 0);
8325 MI.eraseFromParent();
8326 return Legalized;
8327 }
8328 if (Mask == fcAllFlags) {
8329 MIRBuilder.buildConstant(DstReg, 1);
8330 MI.eraseFromParent();
8331 return Legalized;
8332 }
8333
8334 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8335 // version
8336
8337 unsigned BitSize = SrcTy.getScalarSizeInBits();
8338 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8339
8340 LLT IntTy = LLT::scalar(BitSize);
8341 if (SrcTy.isVector())
8342 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8343 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8344
8345 // Various masks.
8346 APInt SignBit = APInt::getSignMask(BitSize);
8347 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8348 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8349 APInt ExpMask = Inf;
8350 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8351 APInt QNaNBitMask =
8352 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8353 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8354
8355 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8356 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8357 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8358 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8359 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8360
8361 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8362 auto Sign =
8364
8365 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8366 // Clang doesn't support capture of structured bindings:
8367 LLT DstTyCopy = DstTy;
8368 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8369 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8370 };
8371
8372 // Tests that involve more than one class should be processed first.
8373 if ((Mask & fcFinite) == fcFinite) {
8374 // finite(V) ==> abs(V) u< exp_mask
8375 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8376 ExpMaskC));
8377 Mask &= ~fcFinite;
8378 } else if ((Mask & fcFinite) == fcPosFinite) {
8379 // finite(V) && V > 0 ==> V u< exp_mask
8380 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8381 ExpMaskC));
8382 Mask &= ~fcPosFinite;
8383 } else if ((Mask & fcFinite) == fcNegFinite) {
8384 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8385 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8386 ExpMaskC);
8387 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8388 appendToRes(And);
8389 Mask &= ~fcNegFinite;
8390 }
8391
8392 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8393 // fcZero | fcSubnormal => test all exponent bits are 0
8394 // TODO: Handle sign bit specific cases
8395 // TODO: Handle inverted case
8396 if (PartialCheck == (fcZero | fcSubnormal)) {
8397 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8399 ExpBits, ZeroC));
8400 Mask &= ~PartialCheck;
8401 }
8402 }
8403
8404 // Check for individual classes.
8405 if (FPClassTest PartialCheck = Mask & fcZero) {
8406 if (PartialCheck == fcPosZero)
8408 AsInt, ZeroC));
8409 else if (PartialCheck == fcZero)
8410 appendToRes(
8412 else // fcNegZero
8414 AsInt, SignBitC));
8415 }
8416
8417 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8418 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8419 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8420 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8421 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8422 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8423 auto SubnormalRes =
8425 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8426 if (PartialCheck == fcNegSubnormal)
8427 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8428 appendToRes(SubnormalRes);
8429 }
8430
8431 if (FPClassTest PartialCheck = Mask & fcInf) {
8432 if (PartialCheck == fcPosInf)
8434 AsInt, InfC));
8435 else if (PartialCheck == fcInf)
8436 appendToRes(
8438 else { // fcNegInf
8439 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8440 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8442 AsInt, NegInfC));
8443 }
8444 }
8445
8446 if (FPClassTest PartialCheck = Mask & fcNan) {
8447 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8448 if (PartialCheck == fcNan) {
8449 // isnan(V) ==> abs(V) u> int(inf)
8450 appendToRes(
8452 } else if (PartialCheck == fcQNan) {
8453 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8454 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8455 InfWithQnanBitC));
8456 } else { // fcSNan
8457 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8458 // abs(V) u< (unsigned(Inf) | quiet_bit)
8459 auto IsNan =
8461 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8462 Abs, InfWithQnanBitC);
8463 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8464 }
8465 }
8466
8467 if (FPClassTest PartialCheck = Mask & fcNormal) {
8468 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8469 // (max_exp-1))
8470 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8471 auto ExpMinusOne = MIRBuilder.buildSub(
8472 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8473 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8474 auto NormalRes =
8476 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8477 if (PartialCheck == fcNegNormal)
8478 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8479 else if (PartialCheck == fcPosNormal) {
8480 auto PosSign = MIRBuilder.buildXor(
8481 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8482 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8483 }
8484 appendToRes(NormalRes);
8485 }
8486
8487 MIRBuilder.buildCopy(DstReg, Res);
8488 MI.eraseFromParent();
8489 return Legalized;
8490}
8491
8493 // Implement G_SELECT in terms of XOR, AND, OR.
8494 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8495 MI.getFirst4RegLLTs();
8496
8497 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8498 if (IsEltPtr) {
8499 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8500 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8501 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8502 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8503 DstTy = NewTy;
8504 }
8505
8506 if (MaskTy.isScalar()) {
8507 // Turn the scalar condition into a vector condition mask if needed.
8508
8509 Register MaskElt = MaskReg;
8510
8511 // The condition was potentially zero extended before, but we want a sign
8512 // extended boolean.
8513 if (MaskTy != LLT::scalar(1))
8514 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8515
8516 // Continue the sign extension (or truncate) to match the data type.
8517 MaskElt =
8518 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8519
8520 if (DstTy.isVector()) {
8521 // Generate a vector splat idiom.
8522 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8523 MaskReg = ShufSplat.getReg(0);
8524 } else {
8525 MaskReg = MaskElt;
8526 }
8527 MaskTy = DstTy;
8528 } else if (!DstTy.isVector()) {
8529 // Cannot handle the case that mask is a vector and dst is a scalar.
8530 return UnableToLegalize;
8531 }
8532
8533 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8534 return UnableToLegalize;
8535 }
8536
8537 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8538 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8539 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8540 if (IsEltPtr) {
8541 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8542 MIRBuilder.buildIntToPtr(DstReg, Or);
8543 } else {
8544 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8545 }
8546 MI.eraseFromParent();
8547 return Legalized;
8548}
8549
8551 // Split DIVREM into individual instructions.
8552 unsigned Opcode = MI.getOpcode();
8553
8555 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8556 : TargetOpcode::G_UDIV,
8557 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8559 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8560 : TargetOpcode::G_UREM,
8561 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8562 MI.eraseFromParent();
8563 return Legalized;
8564}
8565
8568 // Expand %res = G_ABS %a into:
8569 // %v1 = G_ASHR %a, scalar_size-1
8570 // %v2 = G_ADD %a, %v1
8571 // %res = G_XOR %v2, %v1
8572 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8573 Register OpReg = MI.getOperand(1).getReg();
8574 auto ShiftAmt =
8575 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8576 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8577 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8578 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8579 MI.eraseFromParent();
8580 return Legalized;
8581}
8582
8585 // Expand %res = G_ABS %a into:
8586 // %v1 = G_CONSTANT 0
8587 // %v2 = G_SUB %v1, %a
8588 // %res = G_SMAX %a, %v2
8589 Register SrcReg = MI.getOperand(1).getReg();
8590 LLT Ty = MRI.getType(SrcReg);
8591 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8592 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8593 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8594 MI.eraseFromParent();
8595 return Legalized;
8596}
8597
8600 Register SrcReg = MI.getOperand(1).getReg();
8601 Register DestReg = MI.getOperand(0).getReg();
8602 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8603 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8604 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8605 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8606 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8607 MI.eraseFromParent();
8608 return Legalized;
8609}
8610
8613 Register SrcReg = MI.getOperand(1).getReg();
8614 LLT SrcTy = MRI.getType(SrcReg);
8615 LLT DstTy = MRI.getType(SrcReg);
8616
8617 // The source could be a scalar if the IR type was <1 x sN>.
8618 if (SrcTy.isScalar()) {
8619 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8620 return UnableToLegalize; // FIXME: handle extension.
8621 // This can be just a plain copy.
8623 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8625 return Legalized;
8626 }
8627 return UnableToLegalize;
8628}
8629
8631 MachineFunction &MF = *MI.getMF();
8633 LLVMContext &Ctx = MF.getFunction().getContext();
8634 Register ListPtr = MI.getOperand(1).getReg();
8635 LLT PtrTy = MRI.getType(ListPtr);
8636
8637 // LstPtr is a pointer to the head of the list. Get the address
8638 // of the head of the list.
8639 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8640 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8641 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8642 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8643
8644 const Align A(MI.getOperand(2).getImm());
8645 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8646 if (A > TLI.getMinStackArgumentAlignment()) {
8647 Register AlignAmt =
8648 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8649 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8650 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8651 VAList = AndDst.getReg(0);
8652 }
8653
8654 // Increment the pointer, VAList, to the next vaarg
8655 // The list should be bumped by the size of element in the current head of
8656 // list.
8657 Register Dst = MI.getOperand(0).getReg();
8658 LLT LLTTy = MRI.getType(Dst);
8659 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8660 auto IncAmt =
8661 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8662 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8663
8664 // Store the increment VAList to the legalized pointer
8666 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8667 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8668 // Load the actual argument out of the pointer VAList
8669 Align EltAlignment = DL.getABITypeAlign(Ty);
8670 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8671 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8672 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8673
8674 MI.eraseFromParent();
8675 return Legalized;
8676}
8677
8679 // On Darwin, -Os means optimize for size without hurting performance, so
8680 // only really optimize for size when -Oz (MinSize) is used.
8682 return MF.getFunction().hasMinSize();
8683 return MF.getFunction().hasOptSize();
8684}
8685
8686// Returns a list of types to use for memory op lowering in MemOps. A partial
8687// port of findOptimalMemOpLowering in TargetLowering.
8688static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8689 unsigned Limit, const MemOp &Op,
8690 unsigned DstAS, unsigned SrcAS,
8691 const AttributeList &FuncAttributes,
8692 const TargetLowering &TLI) {
8693 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8694 return false;
8695
8696 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8697
8698 if (Ty == LLT()) {
8699 // Use the largest scalar type whose alignment constraints are satisfied.
8700 // We only need to check DstAlign here as SrcAlign is always greater or
8701 // equal to DstAlign (or zero).
8702 Ty = LLT::scalar(64);
8703 if (Op.isFixedDstAlign())
8704 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8705 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8706 Ty = LLT::scalar(Ty.getSizeInBytes());
8707 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8708 // FIXME: check for the largest legal type we can load/store to.
8709 }
8710
8711 unsigned NumMemOps = 0;
8712 uint64_t Size = Op.size();
8713 while (Size) {
8714 unsigned TySize = Ty.getSizeInBytes();
8715 while (TySize > Size) {
8716 // For now, only use non-vector load / store's for the left-over pieces.
8717 LLT NewTy = Ty;
8718 // FIXME: check for mem op safety and legality of the types. Not all of
8719 // SDAGisms map cleanly to GISel concepts.
8720 if (NewTy.isVector())
8721 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8722 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8723 unsigned NewTySize = NewTy.getSizeInBytes();
8724 assert(NewTySize > 0 && "Could not find appropriate type");
8725
8726 // If the new LLT cannot cover all of the remaining bits, then consider
8727 // issuing a (or a pair of) unaligned and overlapping load / store.
8728 unsigned Fast;
8729 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8730 MVT VT = getMVTForLLT(Ty);
8731 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8733 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8735 Fast)
8736 TySize = Size;
8737 else {
8738 Ty = NewTy;
8739 TySize = NewTySize;
8740 }
8741 }
8742
8743 if (++NumMemOps > Limit)
8744 return false;
8745
8746 MemOps.push_back(Ty);
8747 Size -= TySize;
8748 }
8749
8750 return true;
8751}
8752
8753// Get a vectorized representation of the memset value operand, GISel edition.
8755 MachineRegisterInfo &MRI = *MIB.getMRI();
8756 unsigned NumBits = Ty.getScalarSizeInBits();
8757 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8758 if (!Ty.isVector() && ValVRegAndVal) {
8759 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8760 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8761 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8762 }
8763
8764 // Extend the byte value to the larger type, and then multiply by a magic
8765 // value 0x010101... in order to replicate it across every byte.
8766 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8767 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8768 return MIB.buildConstant(Ty, 0).getReg(0);
8769 }
8770
8771 LLT ExtType = Ty.getScalarType();
8772 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8773 if (NumBits > 8) {
8774 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8775 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8776 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8777 }
8778
8779 // For vector types create a G_BUILD_VECTOR.
8780 if (Ty.isVector())
8781 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8782
8783 return Val;
8784}
8785
8787LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8788 uint64_t KnownLen, Align Alignment,
8789 bool IsVolatile) {
8790 auto &MF = *MI.getParent()->getParent();
8791 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8792 auto &DL = MF.getDataLayout();
8793 LLVMContext &C = MF.getFunction().getContext();
8794
8795 assert(KnownLen != 0 && "Have a zero length memset length!");
8796
8797 bool DstAlignCanChange = false;
8798 MachineFrameInfo &MFI = MF.getFrameInfo();
8799 bool OptSize = shouldLowerMemFuncForSize(MF);
8800
8801 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8802 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8803 DstAlignCanChange = true;
8804
8805 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8806 std::vector<LLT> MemOps;
8807
8808 const auto &DstMMO = **MI.memoperands_begin();
8809 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8810
8811 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8812 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8813
8814 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8815 MemOp::Set(KnownLen, DstAlignCanChange,
8816 Alignment,
8817 /*IsZeroMemset=*/IsZeroVal,
8818 /*IsVolatile=*/IsVolatile),
8819 DstPtrInfo.getAddrSpace(), ~0u,
8820 MF.getFunction().getAttributes(), TLI))
8821 return UnableToLegalize;
8822
8823 if (DstAlignCanChange) {
8824 // Get an estimate of the type from the LLT.
8825 Type *IRTy = getTypeForLLT(MemOps[0], C);
8826 Align NewAlign = DL.getABITypeAlign(IRTy);
8827 if (NewAlign > Alignment) {
8828 Alignment = NewAlign;
8829 unsigned FI = FIDef->getOperand(1).getIndex();
8830 // Give the stack frame object a larger alignment if needed.
8831 if (MFI.getObjectAlign(FI) < Alignment)
8832 MFI.setObjectAlignment(FI, Alignment);
8833 }
8834 }
8835
8836 MachineIRBuilder MIB(MI);
8837 // Find the largest store and generate the bit pattern for it.
8838 LLT LargestTy = MemOps[0];
8839 for (unsigned i = 1; i < MemOps.size(); i++)
8840 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8841 LargestTy = MemOps[i];
8842
8843 // The memset stored value is always defined as an s8, so in order to make it
8844 // work with larger store types we need to repeat the bit pattern across the
8845 // wider type.
8846 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8847
8848 if (!MemSetValue)
8849 return UnableToLegalize;
8850
8851 // Generate the stores. For each store type in the list, we generate the
8852 // matching store of that type to the destination address.
8853 LLT PtrTy = MRI.getType(Dst);
8854 unsigned DstOff = 0;
8855 unsigned Size = KnownLen;
8856 for (unsigned I = 0; I < MemOps.size(); I++) {
8857 LLT Ty = MemOps[I];
8858 unsigned TySize = Ty.getSizeInBytes();
8859 if (TySize > Size) {
8860 // Issuing an unaligned load / store pair that overlaps with the previous
8861 // pair. Adjust the offset accordingly.
8862 assert(I == MemOps.size() - 1 && I != 0);
8863 DstOff -= TySize - Size;
8864 }
8865
8866 // If this store is smaller than the largest store see whether we can get
8867 // the smaller value for free with a truncate.
8868 Register Value = MemSetValue;
8869 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8870 MVT VT = getMVTForLLT(Ty);
8871 MVT LargestVT = getMVTForLLT(LargestTy);
8872 if (!LargestTy.isVector() && !Ty.isVector() &&
8873 TLI.isTruncateFree(LargestVT, VT))
8874 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8875 else
8876 Value = getMemsetValue(Val, Ty, MIB);
8877 if (!Value)
8878 return UnableToLegalize;
8879 }
8880
8881 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8882
8883 Register Ptr = Dst;
8884 if (DstOff != 0) {
8885 auto Offset =
8886 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8887 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8888 }
8889
8890 MIB.buildStore(Value, Ptr, *StoreMMO);
8891 DstOff += Ty.getSizeInBytes();
8892 Size -= TySize;
8893 }
8894
8895 MI.eraseFromParent();
8896 return Legalized;
8897}
8898
8900LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8901 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8902
8903 auto [Dst, Src, Len] = MI.getFirst3Regs();
8904
8905 const auto *MMOIt = MI.memoperands_begin();
8906 const MachineMemOperand *MemOp = *MMOIt;
8907 bool IsVolatile = MemOp->isVolatile();
8908
8909 // See if this is a constant length copy
8910 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8911 // FIXME: support dynamically sized G_MEMCPY_INLINE
8912 assert(LenVRegAndVal &&
8913 "inline memcpy with dynamic size is not yet supported");
8914 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8915 if (KnownLen == 0) {
8916 MI.eraseFromParent();
8917 return Legalized;
8918 }
8919
8920 const auto &DstMMO = **MI.memoperands_begin();
8921 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8922 Align DstAlign = DstMMO.getBaseAlign();
8923 Align SrcAlign = SrcMMO.getBaseAlign();
8924
8925 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8926 IsVolatile);
8927}
8928
8930LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8931 uint64_t KnownLen, Align DstAlign,
8932 Align SrcAlign, bool IsVolatile) {
8933 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8934 return lowerMemcpy(MI, Dst, Src, KnownLen,
8935 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8936 IsVolatile);
8937}
8938
8940LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8941 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8942 Align SrcAlign, bool IsVolatile) {
8943 auto &MF = *MI.getParent()->getParent();
8944 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8945 auto &DL = MF.getDataLayout();
8946 LLVMContext &C = MF.getFunction().getContext();
8947
8948 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8949
8950 bool DstAlignCanChange = false;
8951 MachineFrameInfo &MFI = MF.getFrameInfo();
8952 Align Alignment = std::min(DstAlign, SrcAlign);
8953
8954 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8955 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8956 DstAlignCanChange = true;
8957
8958 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8959 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8960 // if the memcpy is in a tail call position.
8961
8962 std::vector<LLT> MemOps;
8963
8964 const auto &DstMMO = **MI.memoperands_begin();
8965 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8966 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8967 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8968
8970 MemOps, Limit,
8971 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8972 IsVolatile),
8973 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8974 MF.getFunction().getAttributes(), TLI))
8975 return UnableToLegalize;
8976
8977 if (DstAlignCanChange) {
8978 // Get an estimate of the type from the LLT.
8979 Type *IRTy = getTypeForLLT(MemOps[0], C);
8980 Align NewAlign = DL.getABITypeAlign(IRTy);
8981
8982 // Don't promote to an alignment that would require dynamic stack
8983 // realignment.
8984 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8985 if (!TRI->hasStackRealignment(MF))
8986 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8987 NewAlign = NewAlign.previous();
8988
8989 if (NewAlign > Alignment) {
8990 Alignment = NewAlign;
8991 unsigned FI = FIDef->getOperand(1).getIndex();
8992 // Give the stack frame object a larger alignment if needed.
8993 if (MFI.getObjectAlign(FI) < Alignment)
8994 MFI.setObjectAlignment(FI, Alignment);
8995 }
8996 }
8997
8998 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8999
9000 MachineIRBuilder MIB(MI);
9001 // Now we need to emit a pair of load and stores for each of the types we've
9002 // collected. I.e. for each type, generate a load from the source pointer of
9003 // that type width, and then generate a corresponding store to the dest buffer
9004 // of that value loaded. This can result in a sequence of loads and stores
9005 // mixed types, depending on what the target specifies as good types to use.
9006 unsigned CurrOffset = 0;
9007 unsigned Size = KnownLen;
9008 for (auto CopyTy : MemOps) {
9009 // Issuing an unaligned load / store pair that overlaps with the previous
9010 // pair. Adjust the offset accordingly.
9011 if (CopyTy.getSizeInBytes() > Size)
9012 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9013
9014 // Construct MMOs for the accesses.
9015 auto *LoadMMO =
9016 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9017 auto *StoreMMO =
9018 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9019
9020 // Create the load.
9021 Register LoadPtr = Src;
9023 if (CurrOffset != 0) {
9024 LLT SrcTy = MRI.getType(Src);
9025 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9026 .getReg(0);
9027 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9028 }
9029 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9030
9031 // Create the store.
9032 Register StorePtr = Dst;
9033 if (CurrOffset != 0) {
9034 LLT DstTy = MRI.getType(Dst);
9035 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9036 }
9037 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9038 CurrOffset += CopyTy.getSizeInBytes();
9039 Size -= CopyTy.getSizeInBytes();
9040 }
9041
9042 MI.eraseFromParent();
9043 return Legalized;
9044}
9045
9047LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9048 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9049 bool IsVolatile) {
9050 auto &MF = *MI.getParent()->getParent();
9051 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9052 auto &DL = MF.getDataLayout();
9053 LLVMContext &C = MF.getFunction().getContext();
9054
9055 assert(KnownLen != 0 && "Have a zero length memmove length!");
9056
9057 bool DstAlignCanChange = false;
9058 MachineFrameInfo &MFI = MF.getFrameInfo();
9059 bool OptSize = shouldLowerMemFuncForSize(MF);
9060 Align Alignment = std::min(DstAlign, SrcAlign);
9061
9062 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9063 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9064 DstAlignCanChange = true;
9065
9066 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9067 std::vector<LLT> MemOps;
9068
9069 const auto &DstMMO = **MI.memoperands_begin();
9070 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9071 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9072 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9073
9074 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9075 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9076 // same thing here.
9078 MemOps, Limit,
9079 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9080 /*IsVolatile*/ true),
9081 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9082 MF.getFunction().getAttributes(), TLI))
9083 return UnableToLegalize;
9084
9085 if (DstAlignCanChange) {
9086 // Get an estimate of the type from the LLT.
9087 Type *IRTy = getTypeForLLT(MemOps[0], C);
9088 Align NewAlign = DL.getABITypeAlign(IRTy);
9089
9090 // Don't promote to an alignment that would require dynamic stack
9091 // realignment.
9092 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9093 if (!TRI->hasStackRealignment(MF))
9094 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
9095 NewAlign = NewAlign.previous();
9096
9097 if (NewAlign > Alignment) {
9098 Alignment = NewAlign;
9099 unsigned FI = FIDef->getOperand(1).getIndex();
9100 // Give the stack frame object a larger alignment if needed.
9101 if (MFI.getObjectAlign(FI) < Alignment)
9102 MFI.setObjectAlignment(FI, Alignment);
9103 }
9104 }
9105
9106 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9107
9108 MachineIRBuilder MIB(MI);
9109 // Memmove requires that we perform the loads first before issuing the stores.
9110 // Apart from that, this loop is pretty much doing the same thing as the
9111 // memcpy codegen function.
9112 unsigned CurrOffset = 0;
9114 for (auto CopyTy : MemOps) {
9115 // Construct MMO for the load.
9116 auto *LoadMMO =
9117 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9118
9119 // Create the load.
9120 Register LoadPtr = Src;
9121 if (CurrOffset != 0) {
9122 LLT SrcTy = MRI.getType(Src);
9123 auto Offset =
9124 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9125 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9126 }
9127 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9128 CurrOffset += CopyTy.getSizeInBytes();
9129 }
9130
9131 CurrOffset = 0;
9132 for (unsigned I = 0; I < MemOps.size(); ++I) {
9133 LLT CopyTy = MemOps[I];
9134 // Now store the values loaded.
9135 auto *StoreMMO =
9136 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9137
9138 Register StorePtr = Dst;
9139 if (CurrOffset != 0) {
9140 LLT DstTy = MRI.getType(Dst);
9141 auto Offset =
9142 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9143 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9144 }
9145 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9146 CurrOffset += CopyTy.getSizeInBytes();
9147 }
9148 MI.eraseFromParent();
9149 return Legalized;
9150}
9151
9154 const unsigned Opc = MI.getOpcode();
9155 // This combine is fairly complex so it's not written with a separate
9156 // matcher function.
9157 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9158 Opc == TargetOpcode::G_MEMSET) &&
9159 "Expected memcpy like instruction");
9160
9161 auto MMOIt = MI.memoperands_begin();
9162 const MachineMemOperand *MemOp = *MMOIt;
9163
9164 Align DstAlign = MemOp->getBaseAlign();
9165 Align SrcAlign;
9166 auto [Dst, Src, Len] = MI.getFirst3Regs();
9167
9168 if (Opc != TargetOpcode::G_MEMSET) {
9169 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9170 MemOp = *(++MMOIt);
9171 SrcAlign = MemOp->getBaseAlign();
9172 }
9173
9174 // See if this is a constant length copy
9175 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9176 if (!LenVRegAndVal)
9177 return UnableToLegalize;
9178 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9179
9180 if (KnownLen == 0) {
9181 MI.eraseFromParent();
9182 return Legalized;
9183 }
9184
9185 bool IsVolatile = MemOp->isVolatile();
9186 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9187 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9188 IsVolatile);
9189
9190 // Don't try to optimize volatile.
9191 if (IsVolatile)
9192 return UnableToLegalize;
9193
9194 if (MaxLen && KnownLen > MaxLen)
9195 return UnableToLegalize;
9196
9197 if (Opc == TargetOpcode::G_MEMCPY) {
9198 auto &MF = *MI.getParent()->getParent();
9199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9200 bool OptSize = shouldLowerMemFuncForSize(MF);
9201 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9202 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9203 IsVolatile);
9204 }
9205 if (Opc == TargetOpcode::G_MEMMOVE)
9206 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9207 if (Opc == TargetOpcode::G_MEMSET)
9208 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9209 return UnableToLegalize;
9210}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1243
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1044
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1004
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
void negate()
Negate this APInt in place.
Definition: APInt.h:1430
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:853
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:250
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:820
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
bool isSigned() const
Definition: InstrTypes.h:1007
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:230
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:940
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:560
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:244
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1974
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1516
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1561
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1165
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:366
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:493
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:235
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:334
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1253
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:597
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)