LLVM 20.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
111
113 GISelChangeObserver &Observer,
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
122
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end());
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
460 case TargetOpcode::G_FATAN2:
461 RTLIBCASE(ATAN2_F);
462 case TargetOpcode::G_FSINH:
463 RTLIBCASE(SINH_F);
464 case TargetOpcode::G_FCOSH:
465 RTLIBCASE(COSH_F);
466 case TargetOpcode::G_FTANH:
467 RTLIBCASE(TANH_F);
468 case TargetOpcode::G_FLOG10:
469 RTLIBCASE(LOG10_F);
470 case TargetOpcode::G_FLOG:
471 RTLIBCASE(LOG_F);
472 case TargetOpcode::G_FLOG2:
473 RTLIBCASE(LOG2_F);
474 case TargetOpcode::G_FLDEXP:
475 RTLIBCASE(LDEXP_F);
476 case TargetOpcode::G_FCEIL:
477 RTLIBCASE(CEIL_F);
478 case TargetOpcode::G_FFLOOR:
479 RTLIBCASE(FLOOR_F);
480 case TargetOpcode::G_FMINNUM:
481 RTLIBCASE(FMIN_F);
482 case TargetOpcode::G_FMAXNUM:
483 RTLIBCASE(FMAX_F);
484 case TargetOpcode::G_FSQRT:
485 RTLIBCASE(SQRT_F);
486 case TargetOpcode::G_FRINT:
487 RTLIBCASE(RINT_F);
488 case TargetOpcode::G_FNEARBYINT:
489 RTLIBCASE(NEARBYINT_F);
490 case TargetOpcode::G_INTRINSIC_TRUNC:
491 RTLIBCASE(TRUNC_F);
492 case TargetOpcode::G_INTRINSIC_ROUND:
493 RTLIBCASE(ROUND_F);
494 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
495 RTLIBCASE(ROUNDEVEN_F);
496 case TargetOpcode::G_INTRINSIC_LRINT:
497 RTLIBCASE(LRINT_F);
498 case TargetOpcode::G_INTRINSIC_LLRINT:
499 RTLIBCASE(LLRINT_F);
500 }
501 llvm_unreachable("Unknown libcall function");
502#undef RTLIBCASE_INT
503#undef RTLIBCASE
504}
505
506/// True if an instruction is in tail position in its caller. Intended for
507/// legalizing libcalls as tail calls when possible.
510 const TargetInstrInfo &TII,
512 MachineBasicBlock &MBB = *MI.getParent();
513 const Function &F = MBB.getParent()->getFunction();
514
515 // Conservatively require the attributes of the call to match those of
516 // the return. Ignore NoAlias and NonNull because they don't affect the
517 // call sequence.
518 AttributeList CallerAttrs = F.getAttributes();
519 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
520 .removeAttribute(Attribute::NoAlias)
521 .removeAttribute(Attribute::NonNull)
522 .hasAttributes())
523 return false;
524
525 // It's not safe to eliminate the sign / zero extension of the return value.
526 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
527 CallerAttrs.hasRetAttr(Attribute::SExt))
528 return false;
529
530 // Only tail call if the following instruction is a standard return or if we
531 // have a `thisreturn` callee, and a sequence like:
532 //
533 // G_MEMCPY %0, %1, %2
534 // $x0 = COPY %0
535 // RET_ReallyLR implicit $x0
536 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
537 if (Next != MBB.instr_end() && Next->isCopy()) {
538 if (MI.getOpcode() == TargetOpcode::G_BZERO)
539 return false;
540
541 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
542 // mempy/etc routines return the same parameter. For other it will be the
543 // returned value.
544 Register VReg = MI.getOperand(0).getReg();
545 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
546 return false;
547
548 Register PReg = Next->getOperand(0).getReg();
549 if (!PReg.isPhysical())
550 return false;
551
552 auto Ret = next_nodbg(Next, MBB.instr_end());
553 if (Ret == MBB.instr_end() || !Ret->isReturn())
554 return false;
555
556 if (Ret->getNumImplicitOperands() != 1)
557 return false;
558
559 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
560 return false;
561
562 // Skip over the COPY that we just validated.
563 Next = Ret;
564 }
565
566 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
567 return false;
568
569 return true;
570}
571
574 const CallLowering::ArgInfo &Result,
576 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
577 MachineInstr *MI) {
578 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
579
581 Info.CallConv = CC;
583 Info.OrigRet = Result;
584 if (MI)
585 Info.IsTailCall =
586 (Result.Ty->isVoidTy() ||
587 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
588 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
589 *MIRBuilder.getMRI());
590
591 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
592 if (!CLI.lowerCall(MIRBuilder, Info))
594
595 if (MI && Info.LoweredTailCall) {
596 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
597
598 // Check debug locations before removing the return.
599 LocObserver.checkpoint(true);
600
601 // We must have a return following the call (or debug insts) to get past
602 // isLibCallInTailPosition.
603 do {
604 MachineInstr *Next = MI->getNextNode();
605 assert(Next &&
606 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
607 "Expected instr following MI to be return or debug inst?");
608 // We lowered a tail call, so the call is now the return from the block.
609 // Delete the old return.
610 Next->eraseFromParent();
611 } while (MI->getNextNode());
612
613 // We expect to lose the debug location from the return.
614 LocObserver.checkpoint(false);
615 }
617}
618
621 const CallLowering::ArgInfo &Result,
623 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
624 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
625 const char *Name = TLI.getLibcallName(Libcall);
626 if (!Name)
628 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
629 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
630}
631
632// Useful for libcalls where all operands have the same type.
635 Type *OpType, LostDebugLocObserver &LocObserver) {
636 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
637
638 // FIXME: What does the original arg index mean here?
640 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
641 Args.push_back({MO.getReg(), OpType, 0});
642 return createLibcall(MIRBuilder, Libcall,
643 {MI.getOperand(0).getReg(), OpType, 0}, Args,
644 LocObserver, &MI);
645}
646
649 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
650 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
651
653 // Add all the args, except for the last which is an imm denoting 'tail'.
654 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
655 Register Reg = MI.getOperand(i).getReg();
656
657 // Need derive an IR type for call lowering.
658 LLT OpLLT = MRI.getType(Reg);
659 Type *OpTy = nullptr;
660 if (OpLLT.isPointer())
661 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
662 else
663 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
664 Args.push_back({Reg, OpTy, 0});
665 }
666
667 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
668 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
669 RTLIB::Libcall RTLibcall;
670 unsigned Opc = MI.getOpcode();
671 switch (Opc) {
672 case TargetOpcode::G_BZERO:
673 RTLibcall = RTLIB::BZERO;
674 break;
675 case TargetOpcode::G_MEMCPY:
676 RTLibcall = RTLIB::MEMCPY;
677 Args[0].Flags[0].setReturned();
678 break;
679 case TargetOpcode::G_MEMMOVE:
680 RTLibcall = RTLIB::MEMMOVE;
681 Args[0].Flags[0].setReturned();
682 break;
683 case TargetOpcode::G_MEMSET:
684 RTLibcall = RTLIB::MEMSET;
685 Args[0].Flags[0].setReturned();
686 break;
687 default:
688 llvm_unreachable("unsupported opcode");
689 }
690 const char *Name = TLI.getLibcallName(RTLibcall);
691
692 // Unsupported libcall on the target.
693 if (!Name) {
694 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
695 << MIRBuilder.getTII().getName(Opc) << "\n");
697 }
698
700 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
702 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
703 Info.IsTailCall =
704 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
705 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
706
707 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
708 if (!CLI.lowerCall(MIRBuilder, Info))
710
711 if (Info.LoweredTailCall) {
712 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
713
714 // Check debug locations before removing the return.
715 LocObserver.checkpoint(true);
716
717 // We must have a return following the call (or debug insts) to get past
718 // isLibCallInTailPosition.
719 do {
720 MachineInstr *Next = MI.getNextNode();
721 assert(Next &&
722 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
723 "Expected instr following MI to be return or debug inst?");
724 // We lowered a tail call, so the call is now the return from the block.
725 // Delete the old return.
726 Next->eraseFromParent();
727 } while (MI.getNextNode());
728
729 // We expect to lose the debug location from the return.
730 LocObserver.checkpoint(false);
731 }
732
734}
735
737 unsigned Opc = MI.getOpcode();
738 auto &AtomicMI = cast<GMemOperation>(MI);
739 auto &MMO = AtomicMI.getMMO();
740 auto Ordering = MMO.getMergedOrdering();
741 LLT MemType = MMO.getMemoryType();
742 uint64_t MemSize = MemType.getSizeInBytes();
743 if (MemType.isVector())
744 return RTLIB::UNKNOWN_LIBCALL;
745
746#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
747#define LCALL5(A) \
748 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
749 switch (Opc) {
750 case TargetOpcode::G_ATOMIC_CMPXCHG:
751 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
752 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
753 return getOutlineAtomicHelper(LC, Ordering, MemSize);
754 }
755 case TargetOpcode::G_ATOMICRMW_XCHG: {
756 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
758 }
759 case TargetOpcode::G_ATOMICRMW_ADD:
760 case TargetOpcode::G_ATOMICRMW_SUB: {
761 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
762 return getOutlineAtomicHelper(LC, Ordering, MemSize);
763 }
764 case TargetOpcode::G_ATOMICRMW_AND: {
765 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 }
768 case TargetOpcode::G_ATOMICRMW_OR: {
769 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
771 }
772 case TargetOpcode::G_ATOMICRMW_XOR: {
773 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
775 }
776 default:
777 return RTLIB::UNKNOWN_LIBCALL;
778 }
779#undef LCALLS
780#undef LCALL5
781}
782
785 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
786
787 Type *RetTy;
788 SmallVector<Register> RetRegs;
790 unsigned Opc = MI.getOpcode();
791 switch (Opc) {
792 case TargetOpcode::G_ATOMIC_CMPXCHG:
793 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
795 LLT SuccessLLT;
796 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
797 MI.getFirst4RegLLTs();
798 RetRegs.push_back(Ret);
799 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
800 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
801 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
802 NewLLT) = MI.getFirst5RegLLTs();
803 RetRegs.push_back(Success);
805 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
806 }
807 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
808 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
809 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
810 break;
811 }
812 case TargetOpcode::G_ATOMICRMW_XCHG:
813 case TargetOpcode::G_ATOMICRMW_ADD:
814 case TargetOpcode::G_ATOMICRMW_SUB:
815 case TargetOpcode::G_ATOMICRMW_AND:
816 case TargetOpcode::G_ATOMICRMW_OR:
817 case TargetOpcode::G_ATOMICRMW_XOR: {
818 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
819 RetRegs.push_back(Ret);
820 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
821 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
822 Val =
823 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
824 .getReg(0);
825 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
826 Val =
827 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
828 .getReg(0);
829 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
830 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
831 break;
832 }
833 default:
834 llvm_unreachable("unsupported opcode");
835 }
836
837 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
838 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
840 const char *Name = TLI.getLibcallName(RTLibcall);
841
842 // Unsupported libcall on the target.
843 if (!Name) {
844 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
845 << MIRBuilder.getTII().getName(Opc) << "\n");
847 }
848
850 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
852 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
853
854 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
855 if (!CLI.lowerCall(MIRBuilder, Info))
857
859}
860
861static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
862 Type *FromType) {
863 auto ToMVT = MVT::getVT(ToType);
864 auto FromMVT = MVT::getVT(FromType);
865
866 switch (Opcode) {
867 case TargetOpcode::G_FPEXT:
868 return RTLIB::getFPEXT(FromMVT, ToMVT);
869 case TargetOpcode::G_FPTRUNC:
870 return RTLIB::getFPROUND(FromMVT, ToMVT);
871 case TargetOpcode::G_FPTOSI:
872 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
873 case TargetOpcode::G_FPTOUI:
874 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
875 case TargetOpcode::G_SITOFP:
876 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
877 case TargetOpcode::G_UITOFP:
878 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
879 }
880 llvm_unreachable("Unsupported libcall function");
881}
882
885 Type *FromType, LostDebugLocObserver &LocObserver,
886 const TargetLowering &TLI, bool IsSigned = false) {
887 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
888 if (FromType->isIntegerTy()) {
889 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
890 Arg.Flags[0].setSExt();
891 else
892 Arg.Flags[0].setZExt();
893 }
894
895 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
896 return createLibcall(MIRBuilder, Libcall,
897 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
898 &MI);
899}
900
901static RTLIB::Libcall
903 RTLIB::Libcall RTLibcall;
904 switch (MI.getOpcode()) {
905 case TargetOpcode::G_GET_FPENV:
906 RTLibcall = RTLIB::FEGETENV;
907 break;
908 case TargetOpcode::G_SET_FPENV:
909 case TargetOpcode::G_RESET_FPENV:
910 RTLibcall = RTLIB::FESETENV;
911 break;
912 case TargetOpcode::G_GET_FPMODE:
913 RTLibcall = RTLIB::FEGETMODE;
914 break;
915 case TargetOpcode::G_SET_FPMODE:
916 case TargetOpcode::G_RESET_FPMODE:
917 RTLibcall = RTLIB::FESETMODE;
918 break;
919 default:
920 llvm_unreachable("Unexpected opcode");
921 }
922 return RTLibcall;
923}
924
925// Some library functions that read FP state (fegetmode, fegetenv) write the
926// state into a region in memory. IR intrinsics that do the same operations
927// (get_fpmode, get_fpenv) return the state as integer value. To implement these
928// intrinsics via the library functions, we need to use temporary variable,
929// for example:
930//
931// %0:_(s32) = G_GET_FPMODE
932//
933// is transformed to:
934//
935// %1:_(p0) = G_FRAME_INDEX %stack.0
936// BL &fegetmode
937// %0:_(s32) = G_LOAD % 1
938//
940LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
942 LostDebugLocObserver &LocObserver) {
944 auto &MF = MIRBuilder.getMF();
945 auto &MRI = *MIRBuilder.getMRI();
946 auto &Ctx = MF.getFunction().getContext();
947
948 // Create temporary, where library function will put the read state.
949 Register Dst = MI.getOperand(0).getReg();
950 LLT StateTy = MRI.getType(Dst);
951 TypeSize StateSize = StateTy.getSizeInBytes();
953 MachinePointerInfo TempPtrInfo;
954 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
955
956 // Create a call to library function, with the temporary as an argument.
957 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
958 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
960 auto Res =
961 createLibcall(MIRBuilder, RTLibcall,
963 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
964 LocObserver, nullptr);
966 return Res;
967
968 // Create a load from the temporary.
969 MachineMemOperand *MMO = MF.getMachineMemOperand(
970 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
971 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
972
974}
975
976// Similar to `createGetStateLibcall` the function calls a library function
977// using transient space in stack. In this case the library function reads
978// content of memory region.
980LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
982 LostDebugLocObserver &LocObserver) {
984 auto &MF = MIRBuilder.getMF();
985 auto &MRI = *MIRBuilder.getMRI();
986 auto &Ctx = MF.getFunction().getContext();
987
988 // Create temporary, where library function will get the new state.
989 Register Src = MI.getOperand(0).getReg();
990 LLT StateTy = MRI.getType(Src);
991 TypeSize StateSize = StateTy.getSizeInBytes();
993 MachinePointerInfo TempPtrInfo;
994 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
995
996 // Put the new state into the temporary.
997 MachineMemOperand *MMO = MF.getMachineMemOperand(
998 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
999 MIRBuilder.buildStore(Src, Temp, *MMO);
1000
1001 // Create a call to library function, with the temporary as an argument.
1002 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1003 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1005 return createLibcall(MIRBuilder, RTLibcall,
1007 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1008 LocObserver, nullptr);
1009}
1010
1011/// Returns the corresponding libcall for the given Pred and
1012/// the ICMP predicate that should be generated to compare with #0
1013/// after the libcall.
1014static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1016#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1017 do { \
1018 switch (Size) { \
1019 case 32: \
1020 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1021 case 64: \
1022 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1023 case 128: \
1024 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1025 default: \
1026 llvm_unreachable("unexpected size"); \
1027 } \
1028 } while (0)
1029
1030 switch (Pred) {
1031 case CmpInst::FCMP_OEQ:
1033 case CmpInst::FCMP_UNE:
1035 case CmpInst::FCMP_OGE:
1037 case CmpInst::FCMP_OLT:
1039 case CmpInst::FCMP_OLE:
1041 case CmpInst::FCMP_OGT:
1043 case CmpInst::FCMP_UNO:
1045 default:
1046 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1047 }
1048}
1049
1051LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1053 LostDebugLocObserver &LocObserver) {
1054 auto &MF = MIRBuilder.getMF();
1055 auto &Ctx = MF.getFunction().getContext();
1056 const GFCmp *Cmp = cast<GFCmp>(&MI);
1057
1058 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1059 unsigned Size = OpLLT.getSizeInBits();
1060 if ((Size != 32 && Size != 64 && Size != 128) ||
1061 OpLLT != MRI.getType(Cmp->getRHSReg()))
1062 return UnableToLegalize;
1063
1064 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1065
1066 // DstReg type is s32
1067 const Register DstReg = Cmp->getReg(0);
1068 LLT DstTy = MRI.getType(DstReg);
1069 const auto Cond = Cmp->getCond();
1070
1071 // Reference:
1072 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1073 // Generates a libcall followed by ICMP.
1074 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1075 const CmpInst::Predicate ICmpPred,
1076 const DstOp &Res) -> Register {
1077 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1078 constexpr LLT TempLLT = LLT::scalar(32);
1079 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1080 // Generate libcall, holding result in Temp
1081 const auto Status = createLibcall(
1082 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1083 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1084 LocObserver, &MI);
1085 if (!Status)
1086 return {};
1087
1088 // Compare temp with #0 to get the final result.
1089 return MIRBuilder
1090 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1091 .getReg(0);
1092 };
1093
1094 // Simple case if we have a direct mapping from predicate to libcall
1095 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1096 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1097 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1098 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1099 return Legalized;
1100 }
1101 return UnableToLegalize;
1102 }
1103
1104 // No direct mapping found, should be generated as combination of libcalls.
1105
1106 switch (Cond) {
1107 case CmpInst::FCMP_UEQ: {
1108 // FCMP_UEQ: unordered or equal
1109 // Convert into (FCMP_OEQ || FCMP_UNO).
1110
1111 const auto [OeqLibcall, OeqPred] =
1113 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1114
1115 const auto [UnoLibcall, UnoPred] =
1117 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1118 if (Oeq && Uno)
1119 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1120 else
1121 return UnableToLegalize;
1122
1123 break;
1124 }
1125 case CmpInst::FCMP_ONE: {
1126 // FCMP_ONE: ordered and operands are unequal
1127 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1128
1129 // We inverse the predicate instead of generating a NOT
1130 // to save one instruction.
1131 // On AArch64 isel can even select two cmp into a single ccmp.
1132 const auto [OeqLibcall, OeqPred] =
1134 const auto NotOeq =
1135 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1136
1137 const auto [UnoLibcall, UnoPred] =
1139 const auto NotUno =
1140 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1141
1142 if (NotOeq && NotUno)
1143 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1144 else
1145 return UnableToLegalize;
1146
1147 break;
1148 }
1149 case CmpInst::FCMP_ULT:
1150 case CmpInst::FCMP_UGE:
1151 case CmpInst::FCMP_UGT:
1152 case CmpInst::FCMP_ULE:
1153 case CmpInst::FCMP_ORD: {
1154 // Convert into: !(inverse(Pred))
1155 // E.g. FCMP_ULT becomes !FCMP_OGE
1156 // This is equivalent to the following, but saves some instructions.
1157 // MIRBuilder.buildNot(
1158 // PredTy,
1159 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1160 // Op1, Op2));
1161 const auto [InversedLibcall, InversedPred] =
1163 if (!BuildLibcall(InversedLibcall,
1164 CmpInst::getInversePredicate(InversedPred), DstReg))
1165 return UnableToLegalize;
1166 break;
1167 }
1168 default:
1169 return UnableToLegalize;
1170 }
1171
1172 return Legalized;
1173}
1174
1175// The function is used to legalize operations that set default environment
1176// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1177// On most targets supported in glibc FE_DFL_MODE is defined as
1178// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1179// it is not true, the target must provide custom lowering.
1181LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1183 LostDebugLocObserver &LocObserver) {
1185 auto &MF = MIRBuilder.getMF();
1186 auto &Ctx = MF.getFunction().getContext();
1187
1188 // Create an argument for the library function.
1189 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1190 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1191 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1192 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1193 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1194 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1195 MIRBuilder.buildIntToPtr(Dest, DefValue);
1196
1198 return createLibcall(MIRBuilder, RTLibcall,
1200 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1201 LocObserver, &MI);
1202}
1203
1206 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1207
1208 switch (MI.getOpcode()) {
1209 default:
1210 return UnableToLegalize;
1211 case TargetOpcode::G_MUL:
1212 case TargetOpcode::G_SDIV:
1213 case TargetOpcode::G_UDIV:
1214 case TargetOpcode::G_SREM:
1215 case TargetOpcode::G_UREM:
1216 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1217 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1218 unsigned Size = LLTy.getSizeInBits();
1219 Type *HLTy = IntegerType::get(Ctx, Size);
1220 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1221 if (Status != Legalized)
1222 return Status;
1223 break;
1224 }
1225 case TargetOpcode::G_FADD:
1226 case TargetOpcode::G_FSUB:
1227 case TargetOpcode::G_FMUL:
1228 case TargetOpcode::G_FDIV:
1229 case TargetOpcode::G_FMA:
1230 case TargetOpcode::G_FPOW:
1231 case TargetOpcode::G_FREM:
1232 case TargetOpcode::G_FCOS:
1233 case TargetOpcode::G_FSIN:
1234 case TargetOpcode::G_FTAN:
1235 case TargetOpcode::G_FACOS:
1236 case TargetOpcode::G_FASIN:
1237 case TargetOpcode::G_FATAN:
1238 case TargetOpcode::G_FATAN2:
1239 case TargetOpcode::G_FCOSH:
1240 case TargetOpcode::G_FSINH:
1241 case TargetOpcode::G_FTANH:
1242 case TargetOpcode::G_FLOG10:
1243 case TargetOpcode::G_FLOG:
1244 case TargetOpcode::G_FLOG2:
1245 case TargetOpcode::G_FEXP:
1246 case TargetOpcode::G_FEXP2:
1247 case TargetOpcode::G_FEXP10:
1248 case TargetOpcode::G_FCEIL:
1249 case TargetOpcode::G_FFLOOR:
1250 case TargetOpcode::G_FMINNUM:
1251 case TargetOpcode::G_FMAXNUM:
1252 case TargetOpcode::G_FSQRT:
1253 case TargetOpcode::G_FRINT:
1254 case TargetOpcode::G_FNEARBYINT:
1255 case TargetOpcode::G_INTRINSIC_TRUNC:
1256 case TargetOpcode::G_INTRINSIC_ROUND:
1257 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1258 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1259 unsigned Size = LLTy.getSizeInBits();
1260 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1261 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1262 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1263 return UnableToLegalize;
1264 }
1265 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1266 if (Status != Legalized)
1267 return Status;
1268 break;
1269 }
1270 case TargetOpcode::G_INTRINSIC_LRINT:
1271 case TargetOpcode::G_INTRINSIC_LLRINT: {
1272 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1273 unsigned Size = LLTy.getSizeInBits();
1274 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1275 Type *ITy = IntegerType::get(
1276 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1277 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1278 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1279 return UnableToLegalize;
1280 }
1281 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1283 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1284 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1285 if (Status != Legalized)
1286 return Status;
1287 MI.eraseFromParent();
1288 return Legalized;
1289 }
1290 case TargetOpcode::G_FPOWI:
1291 case TargetOpcode::G_FLDEXP: {
1292 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1293 unsigned Size = LLTy.getSizeInBits();
1294 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1295 Type *ITy = IntegerType::get(
1296 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1297 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1298 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1299 return UnableToLegalize;
1300 }
1301 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1303 {MI.getOperand(1).getReg(), HLTy, 0},
1304 {MI.getOperand(2).getReg(), ITy, 1}};
1305 Args[1].Flags[0].setSExt();
1307 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1308 Args, LocObserver, &MI);
1309 if (Status != Legalized)
1310 return Status;
1311 break;
1312 }
1313 case TargetOpcode::G_FPEXT:
1314 case TargetOpcode::G_FPTRUNC: {
1315 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1316 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1317 if (!FromTy || !ToTy)
1318 return UnableToLegalize;
1320 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1321 if (Status != Legalized)
1322 return Status;
1323 break;
1324 }
1325 case TargetOpcode::G_FCMP: {
1326 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1327 if (Status != Legalized)
1328 return Status;
1329 MI.eraseFromParent();
1330 return Status;
1331 }
1332 case TargetOpcode::G_FPTOSI:
1333 case TargetOpcode::G_FPTOUI: {
1334 // FIXME: Support other types
1335 Type *FromTy =
1336 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1337 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1338 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1339 return UnableToLegalize;
1341 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1342 if (Status != Legalized)
1343 return Status;
1344 break;
1345 }
1346 case TargetOpcode::G_SITOFP:
1347 case TargetOpcode::G_UITOFP: {
1348 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1349 Type *ToTy =
1350 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1351 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1352 return UnableToLegalize;
1353 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1355 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1356 LocObserver, TLI, IsSigned);
1357 if (Status != Legalized)
1358 return Status;
1359 break;
1360 }
1361 case TargetOpcode::G_ATOMICRMW_XCHG:
1362 case TargetOpcode::G_ATOMICRMW_ADD:
1363 case TargetOpcode::G_ATOMICRMW_SUB:
1364 case TargetOpcode::G_ATOMICRMW_AND:
1365 case TargetOpcode::G_ATOMICRMW_OR:
1366 case TargetOpcode::G_ATOMICRMW_XOR:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG:
1368 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1370 if (Status != Legalized)
1371 return Status;
1372 break;
1373 }
1374 case TargetOpcode::G_BZERO:
1375 case TargetOpcode::G_MEMCPY:
1376 case TargetOpcode::G_MEMMOVE:
1377 case TargetOpcode::G_MEMSET: {
1378 LegalizeResult Result =
1379 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1380 if (Result != Legalized)
1381 return Result;
1382 MI.eraseFromParent();
1383 return Result;
1384 }
1385 case TargetOpcode::G_GET_FPENV:
1386 case TargetOpcode::G_GET_FPMODE: {
1387 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1388 if (Result != Legalized)
1389 return Result;
1390 break;
1391 }
1392 case TargetOpcode::G_SET_FPENV:
1393 case TargetOpcode::G_SET_FPMODE: {
1394 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1395 if (Result != Legalized)
1396 return Result;
1397 break;
1398 }
1399 case TargetOpcode::G_RESET_FPENV:
1400 case TargetOpcode::G_RESET_FPMODE: {
1401 LegalizeResult Result =
1402 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1403 if (Result != Legalized)
1404 return Result;
1405 break;
1406 }
1407 }
1408
1409 MI.eraseFromParent();
1410 return Legalized;
1411}
1412
1414 unsigned TypeIdx,
1415 LLT NarrowTy) {
1416 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1417 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1418
1419 switch (MI.getOpcode()) {
1420 default:
1421 return UnableToLegalize;
1422 case TargetOpcode::G_IMPLICIT_DEF: {
1423 Register DstReg = MI.getOperand(0).getReg();
1424 LLT DstTy = MRI.getType(DstReg);
1425
1426 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1427 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1428 // FIXME: Although this would also be legal for the general case, it causes
1429 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1430 // combines not being hit). This seems to be a problem related to the
1431 // artifact combiner.
1432 if (SizeOp0 % NarrowSize != 0) {
1433 LLT ImplicitTy = NarrowTy;
1434 if (DstTy.isVector())
1435 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1436
1437 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1438 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1439
1440 MI.eraseFromParent();
1441 return Legalized;
1442 }
1443
1444 int NumParts = SizeOp0 / NarrowSize;
1445
1447 for (int i = 0; i < NumParts; ++i)
1448 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1449
1450 if (DstTy.isVector())
1451 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1452 else
1453 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1454 MI.eraseFromParent();
1455 return Legalized;
1456 }
1457 case TargetOpcode::G_CONSTANT: {
1458 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1459 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1460 unsigned TotalSize = Ty.getSizeInBits();
1461 unsigned NarrowSize = NarrowTy.getSizeInBits();
1462 int NumParts = TotalSize / NarrowSize;
1463
1464 SmallVector<Register, 4> PartRegs;
1465 for (int I = 0; I != NumParts; ++I) {
1466 unsigned Offset = I * NarrowSize;
1467 auto K = MIRBuilder.buildConstant(NarrowTy,
1468 Val.lshr(Offset).trunc(NarrowSize));
1469 PartRegs.push_back(K.getReg(0));
1470 }
1471
1472 LLT LeftoverTy;
1473 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1474 SmallVector<Register, 1> LeftoverRegs;
1475 if (LeftoverBits != 0) {
1476 LeftoverTy = LLT::scalar(LeftoverBits);
1477 auto K = MIRBuilder.buildConstant(
1478 LeftoverTy,
1479 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1480 LeftoverRegs.push_back(K.getReg(0));
1481 }
1482
1483 insertParts(MI.getOperand(0).getReg(),
1484 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1485
1486 MI.eraseFromParent();
1487 return Legalized;
1488 }
1489 case TargetOpcode::G_SEXT:
1490 case TargetOpcode::G_ZEXT:
1491 case TargetOpcode::G_ANYEXT:
1492 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1493 case TargetOpcode::G_TRUNC: {
1494 if (TypeIdx != 1)
1495 return UnableToLegalize;
1496
1497 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1498 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1499 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1500 return UnableToLegalize;
1501 }
1502
1503 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1504 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1505 MI.eraseFromParent();
1506 return Legalized;
1507 }
1508 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1509 case TargetOpcode::G_FREEZE: {
1510 if (TypeIdx != 0)
1511 return UnableToLegalize;
1512
1513 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1514 // Should widen scalar first
1515 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1516 return UnableToLegalize;
1517
1518 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1520 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1521 Parts.push_back(
1522 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1523 .getReg(0));
1524 }
1525
1526 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1527 MI.eraseFromParent();
1528 return Legalized;
1529 }
1530 case TargetOpcode::G_ADD:
1531 case TargetOpcode::G_SUB:
1532 case TargetOpcode::G_SADDO:
1533 case TargetOpcode::G_SSUBO:
1534 case TargetOpcode::G_SADDE:
1535 case TargetOpcode::G_SSUBE:
1536 case TargetOpcode::G_UADDO:
1537 case TargetOpcode::G_USUBO:
1538 case TargetOpcode::G_UADDE:
1539 case TargetOpcode::G_USUBE:
1540 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1541 case TargetOpcode::G_MUL:
1542 case TargetOpcode::G_UMULH:
1543 return narrowScalarMul(MI, NarrowTy);
1544 case TargetOpcode::G_EXTRACT:
1545 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1546 case TargetOpcode::G_INSERT:
1547 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1548 case TargetOpcode::G_LOAD: {
1549 auto &LoadMI = cast<GLoad>(MI);
1550 Register DstReg = LoadMI.getDstReg();
1551 LLT DstTy = MRI.getType(DstReg);
1552 if (DstTy.isVector())
1553 return UnableToLegalize;
1554
1555 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1556 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1557 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1558 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1559 LoadMI.eraseFromParent();
1560 return Legalized;
1561 }
1562
1563 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1564 }
1565 case TargetOpcode::G_ZEXTLOAD:
1566 case TargetOpcode::G_SEXTLOAD: {
1567 auto &LoadMI = cast<GExtLoad>(MI);
1568 Register DstReg = LoadMI.getDstReg();
1569 Register PtrReg = LoadMI.getPointerReg();
1570
1571 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1572 auto &MMO = LoadMI.getMMO();
1573 unsigned MemSize = MMO.getSizeInBits().getValue();
1574
1575 if (MemSize == NarrowSize) {
1576 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1577 } else if (MemSize < NarrowSize) {
1578 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1579 } else if (MemSize > NarrowSize) {
1580 // FIXME: Need to split the load.
1581 return UnableToLegalize;
1582 }
1583
1584 if (isa<GZExtLoad>(LoadMI))
1585 MIRBuilder.buildZExt(DstReg, TmpReg);
1586 else
1587 MIRBuilder.buildSExt(DstReg, TmpReg);
1588
1589 LoadMI.eraseFromParent();
1590 return Legalized;
1591 }
1592 case TargetOpcode::G_STORE: {
1593 auto &StoreMI = cast<GStore>(MI);
1594
1595 Register SrcReg = StoreMI.getValueReg();
1596 LLT SrcTy = MRI.getType(SrcReg);
1597 if (SrcTy.isVector())
1598 return UnableToLegalize;
1599
1600 int NumParts = SizeOp0 / NarrowSize;
1601 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1602 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1603 if (SrcTy.isVector() && LeftoverBits != 0)
1604 return UnableToLegalize;
1605
1606 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1607 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1608 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1609 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1610 StoreMI.eraseFromParent();
1611 return Legalized;
1612 }
1613
1614 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1615 }
1616 case TargetOpcode::G_SELECT:
1617 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1618 case TargetOpcode::G_AND:
1619 case TargetOpcode::G_OR:
1620 case TargetOpcode::G_XOR: {
1621 // Legalize bitwise operation:
1622 // A = BinOp<Ty> B, C
1623 // into:
1624 // B1, ..., BN = G_UNMERGE_VALUES B
1625 // C1, ..., CN = G_UNMERGE_VALUES C
1626 // A1 = BinOp<Ty/N> B1, C2
1627 // ...
1628 // AN = BinOp<Ty/N> BN, CN
1629 // A = G_MERGE_VALUES A1, ..., AN
1630 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1631 }
1632 case TargetOpcode::G_SHL:
1633 case TargetOpcode::G_LSHR:
1634 case TargetOpcode::G_ASHR:
1635 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1636 case TargetOpcode::G_CTLZ:
1637 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1638 case TargetOpcode::G_CTTZ:
1639 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1640 case TargetOpcode::G_CTPOP:
1641 if (TypeIdx == 1)
1642 switch (MI.getOpcode()) {
1643 case TargetOpcode::G_CTLZ:
1644 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1645 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1646 case TargetOpcode::G_CTTZ:
1647 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1648 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1649 case TargetOpcode::G_CTPOP:
1650 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1651 default:
1652 return UnableToLegalize;
1653 }
1654
1656 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1658 return Legalized;
1659 case TargetOpcode::G_INTTOPTR:
1660 if (TypeIdx != 1)
1661 return UnableToLegalize;
1662
1664 narrowScalarSrc(MI, NarrowTy, 1);
1666 return Legalized;
1667 case TargetOpcode::G_PTRTOINT:
1668 if (TypeIdx != 0)
1669 return UnableToLegalize;
1670
1672 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1674 return Legalized;
1675 case TargetOpcode::G_PHI: {
1676 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1677 // NarrowSize.
1678 if (SizeOp0 % NarrowSize != 0)
1679 return UnableToLegalize;
1680
1681 unsigned NumParts = SizeOp0 / NarrowSize;
1682 SmallVector<Register, 2> DstRegs(NumParts);
1683 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1685 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1686 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1688 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1689 SrcRegs[i / 2], MIRBuilder, MRI);
1690 }
1691 MachineBasicBlock &MBB = *MI.getParent();
1693 for (unsigned i = 0; i < NumParts; ++i) {
1694 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1696 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1697 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1698 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1699 }
1701 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1703 MI.eraseFromParent();
1704 return Legalized;
1705 }
1706 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1707 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1708 if (TypeIdx != 2)
1709 return UnableToLegalize;
1710
1711 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1713 narrowScalarSrc(MI, NarrowTy, OpIdx);
1715 return Legalized;
1716 }
1717 case TargetOpcode::G_ICMP: {
1718 Register LHS = MI.getOperand(2).getReg();
1719 LLT SrcTy = MRI.getType(LHS);
1720 CmpInst::Predicate Pred =
1721 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1722
1723 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1724 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1725 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1726 LHSLeftoverRegs, MIRBuilder, MRI))
1727 return UnableToLegalize;
1728
1729 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1730 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1731 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1732 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1733 return UnableToLegalize;
1734
1735 // We now have the LHS and RHS of the compare split into narrow-type
1736 // registers, plus potentially some leftover type.
1737 Register Dst = MI.getOperand(0).getReg();
1738 LLT ResTy = MRI.getType(Dst);
1739 if (ICmpInst::isEquality(Pred)) {
1740 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1741 // them together. For each equal part, the result should be all 0s. For
1742 // each non-equal part, we'll get at least one 1.
1743 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1745 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1746 auto LHS = std::get<0>(LHSAndRHS);
1747 auto RHS = std::get<1>(LHSAndRHS);
1748 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1749 Xors.push_back(Xor);
1750 }
1751
1752 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1753 // to the desired narrow type so that we can OR them together later.
1754 SmallVector<Register, 4> WidenedXors;
1755 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1756 auto LHS = std::get<0>(LHSAndRHS);
1757 auto RHS = std::get<1>(LHSAndRHS);
1758 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1759 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1760 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1761 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1762 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1763 }
1764
1765 // Now, for each part we broke up, we know if they are equal/not equal
1766 // based off the G_XOR. We can OR these all together and compare against
1767 // 0 to get the result.
1768 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1769 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1770 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1771 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1772 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1773 } else {
1774 Register CmpIn;
1775 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1776 Register CmpOut;
1777 CmpInst::Predicate PartPred;
1778
1779 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1780 PartPred = Pred;
1781 CmpOut = Dst;
1782 } else {
1783 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1784 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1785 }
1786
1787 if (!CmpIn) {
1788 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1789 RHSPartRegs[I]);
1790 } else {
1791 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1792 RHSPartRegs[I]);
1794 LHSPartRegs[I], RHSPartRegs[I]);
1795 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1796 }
1797
1798 CmpIn = CmpOut;
1799 }
1800
1801 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1802 Register CmpOut;
1803 CmpInst::Predicate PartPred;
1804
1805 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1806 PartPred = Pred;
1807 CmpOut = Dst;
1808 } else {
1809 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1810 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1811 }
1812
1813 if (!CmpIn) {
1814 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1815 RHSLeftoverRegs[I]);
1816 } else {
1817 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1818 RHSLeftoverRegs[I]);
1819 auto CmpEq =
1821 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1822 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1823 }
1824
1825 CmpIn = CmpOut;
1826 }
1827 }
1828 MI.eraseFromParent();
1829 return Legalized;
1830 }
1831 case TargetOpcode::G_FCMP:
1832 if (TypeIdx != 0)
1833 return UnableToLegalize;
1834
1836 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1838 return Legalized;
1839
1840 case TargetOpcode::G_SEXT_INREG: {
1841 if (TypeIdx != 0)
1842 return UnableToLegalize;
1843
1844 int64_t SizeInBits = MI.getOperand(2).getImm();
1845
1846 // So long as the new type has more bits than the bits we're extending we
1847 // don't need to break it apart.
1848 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1850 // We don't lose any non-extension bits by truncating the src and
1851 // sign-extending the dst.
1852 MachineOperand &MO1 = MI.getOperand(1);
1853 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1854 MO1.setReg(TruncMIB.getReg(0));
1855
1856 MachineOperand &MO2 = MI.getOperand(0);
1857 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1859 MIRBuilder.buildSExt(MO2, DstExt);
1860 MO2.setReg(DstExt);
1862 return Legalized;
1863 }
1864
1865 // Break it apart. Components below the extension point are unmodified. The
1866 // component containing the extension point becomes a narrower SEXT_INREG.
1867 // Components above it are ashr'd from the component containing the
1868 // extension point.
1869 if (SizeOp0 % NarrowSize != 0)
1870 return UnableToLegalize;
1871 int NumParts = SizeOp0 / NarrowSize;
1872
1873 // List the registers where the destination will be scattered.
1875 // List the registers where the source will be split.
1877
1878 // Create all the temporary registers.
1879 for (int i = 0; i < NumParts; ++i) {
1880 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1881
1882 SrcRegs.push_back(SrcReg);
1883 }
1884
1885 // Explode the big arguments into smaller chunks.
1886 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1887
1888 Register AshrCstReg =
1889 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1890 .getReg(0);
1891 Register FullExtensionReg;
1892 Register PartialExtensionReg;
1893
1894 // Do the operation on each small part.
1895 for (int i = 0; i < NumParts; ++i) {
1896 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1897 DstRegs.push_back(SrcRegs[i]);
1898 PartialExtensionReg = DstRegs.back();
1899 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1900 assert(PartialExtensionReg &&
1901 "Expected to visit partial extension before full");
1902 if (FullExtensionReg) {
1903 DstRegs.push_back(FullExtensionReg);
1904 continue;
1905 }
1906 DstRegs.push_back(
1907 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1908 .getReg(0));
1909 FullExtensionReg = DstRegs.back();
1910 } else {
1911 DstRegs.push_back(
1913 .buildInstr(
1914 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1915 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1916 .getReg(0));
1917 PartialExtensionReg = DstRegs.back();
1918 }
1919 }
1920
1921 // Gather the destination registers into the final destination.
1922 Register DstReg = MI.getOperand(0).getReg();
1923 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1924 MI.eraseFromParent();
1925 return Legalized;
1926 }
1927 case TargetOpcode::G_BSWAP:
1928 case TargetOpcode::G_BITREVERSE: {
1929 if (SizeOp0 % NarrowSize != 0)
1930 return UnableToLegalize;
1931
1933 SmallVector<Register, 2> SrcRegs, DstRegs;
1934 unsigned NumParts = SizeOp0 / NarrowSize;
1935 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1936 MIRBuilder, MRI);
1937
1938 for (unsigned i = 0; i < NumParts; ++i) {
1939 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1940 {SrcRegs[NumParts - 1 - i]});
1941 DstRegs.push_back(DstPart.getReg(0));
1942 }
1943
1944 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1945
1947 MI.eraseFromParent();
1948 return Legalized;
1949 }
1950 case TargetOpcode::G_PTR_ADD:
1951 case TargetOpcode::G_PTRMASK: {
1952 if (TypeIdx != 1)
1953 return UnableToLegalize;
1955 narrowScalarSrc(MI, NarrowTy, 2);
1957 return Legalized;
1958 }
1959 case TargetOpcode::G_FPTOUI:
1960 case TargetOpcode::G_FPTOSI:
1961 case TargetOpcode::G_FPTOUI_SAT:
1962 case TargetOpcode::G_FPTOSI_SAT:
1963 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1964 case TargetOpcode::G_FPEXT:
1965 if (TypeIdx != 0)
1966 return UnableToLegalize;
1968 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1970 return Legalized;
1971 case TargetOpcode::G_FLDEXP:
1972 case TargetOpcode::G_STRICT_FLDEXP:
1973 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1974 case TargetOpcode::G_VSCALE: {
1975 Register Dst = MI.getOperand(0).getReg();
1976 LLT Ty = MRI.getType(Dst);
1977
1978 // Assume VSCALE(1) fits into a legal integer
1979 const APInt One(NarrowTy.getSizeInBits(), 1);
1980 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1981 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1982 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1983 MIRBuilder.buildMul(Dst, ZExt, C);
1984
1985 MI.eraseFromParent();
1986 return Legalized;
1987 }
1988 }
1989}
1990
1992 LLT Ty = MRI.getType(Val);
1993 if (Ty.isScalar())
1994 return Val;
1995
1997 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1998 if (Ty.isPointer()) {
1999 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2000 return Register();
2001 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2002 }
2003
2004 Register NewVal = Val;
2005
2006 assert(Ty.isVector());
2007 if (Ty.isPointerVector())
2008 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2009 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2010}
2011
2013 unsigned OpIdx, unsigned ExtOpcode) {
2014 MachineOperand &MO = MI.getOperand(OpIdx);
2015 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2016 MO.setReg(ExtB.getReg(0));
2017}
2018
2020 unsigned OpIdx) {
2021 MachineOperand &MO = MI.getOperand(OpIdx);
2022 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2023 MO.setReg(ExtB.getReg(0));
2024}
2025
2027 unsigned OpIdx, unsigned TruncOpcode) {
2028 MachineOperand &MO = MI.getOperand(OpIdx);
2029 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2031 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2032 MO.setReg(DstExt);
2033}
2034
2036 unsigned OpIdx, unsigned ExtOpcode) {
2037 MachineOperand &MO = MI.getOperand(OpIdx);
2038 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2040 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2041 MO.setReg(DstTrunc);
2042}
2043
2045 unsigned OpIdx) {
2046 MachineOperand &MO = MI.getOperand(OpIdx);
2048 Register Dst = MO.getReg();
2049 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2050 MO.setReg(DstExt);
2052}
2053
2055 unsigned OpIdx) {
2056 MachineOperand &MO = MI.getOperand(OpIdx);
2059}
2060
2061void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2062 MachineOperand &Op = MI.getOperand(OpIdx);
2063 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2064}
2065
2066void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2067 MachineOperand &MO = MI.getOperand(OpIdx);
2068 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2070 MIRBuilder.buildBitcast(MO, CastDst);
2071 MO.setReg(CastDst);
2072}
2073
2075LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2076 LLT WideTy) {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079
2080 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2081 if (DstTy.isVector())
2082 return UnableToLegalize;
2083
2084 LLT SrcTy = MRI.getType(Src1Reg);
2085 const int DstSize = DstTy.getSizeInBits();
2086 const int SrcSize = SrcTy.getSizeInBits();
2087 const int WideSize = WideTy.getSizeInBits();
2088 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2089
2090 unsigned NumOps = MI.getNumOperands();
2091 unsigned NumSrc = MI.getNumOperands() - 1;
2092 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2093
2094 if (WideSize >= DstSize) {
2095 // Directly pack the bits in the target type.
2096 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2097
2098 for (unsigned I = 2; I != NumOps; ++I) {
2099 const unsigned Offset = (I - 1) * PartSize;
2100
2101 Register SrcReg = MI.getOperand(I).getReg();
2102 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2103
2104 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2105
2106 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2107 MRI.createGenericVirtualRegister(WideTy);
2108
2109 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2110 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2111 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2112 ResultReg = NextResult;
2113 }
2114
2115 if (WideSize > DstSize)
2116 MIRBuilder.buildTrunc(DstReg, ResultReg);
2117 else if (DstTy.isPointer())
2118 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2119
2120 MI.eraseFromParent();
2121 return Legalized;
2122 }
2123
2124 // Unmerge the original values to the GCD type, and recombine to the next
2125 // multiple greater than the original type.
2126 //
2127 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2128 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2129 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2130 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2131 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2132 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2133 // %12:_(s12) = G_MERGE_VALUES %10, %11
2134 //
2135 // Padding with undef if necessary:
2136 //
2137 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2138 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2139 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2140 // %7:_(s2) = G_IMPLICIT_DEF
2141 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2142 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2143 // %10:_(s12) = G_MERGE_VALUES %8, %9
2144
2145 const int GCD = std::gcd(SrcSize, WideSize);
2146 LLT GCDTy = LLT::scalar(GCD);
2147
2149 SmallVector<Register, 8> NewMergeRegs;
2150 SmallVector<Register, 8> Unmerges;
2151 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2152
2153 // Decompose the original operands if they don't evenly divide.
2154 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2155 Register SrcReg = MO.getReg();
2156 if (GCD == SrcSize) {
2157 Unmerges.push_back(SrcReg);
2158 } else {
2159 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2160 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2161 Unmerges.push_back(Unmerge.getReg(J));
2162 }
2163 }
2164
2165 // Pad with undef to the next size that is a multiple of the requested size.
2166 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2167 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2168 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2169 Unmerges.push_back(UndefReg);
2170 }
2171
2172 const int PartsPerGCD = WideSize / GCD;
2173
2174 // Build merges of each piece.
2175 ArrayRef<Register> Slicer(Unmerges);
2176 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2177 auto Merge =
2178 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2179 NewMergeRegs.push_back(Merge.getReg(0));
2180 }
2181
2182 // A truncate may be necessary if the requested type doesn't evenly divide the
2183 // original result type.
2184 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2185 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2186 } else {
2187 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2188 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2189 }
2190
2191 MI.eraseFromParent();
2192 return Legalized;
2193}
2194
2196LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2197 LLT WideTy) {
2198 if (TypeIdx != 0)
2199 return UnableToLegalize;
2200
2201 int NumDst = MI.getNumOperands() - 1;
2202 Register SrcReg = MI.getOperand(NumDst).getReg();
2203 LLT SrcTy = MRI.getType(SrcReg);
2204 if (SrcTy.isVector())
2205 return UnableToLegalize;
2206
2207 Register Dst0Reg = MI.getOperand(0).getReg();
2208 LLT DstTy = MRI.getType(Dst0Reg);
2209 if (!DstTy.isScalar())
2210 return UnableToLegalize;
2211
2212 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2213 if (SrcTy.isPointer()) {
2215 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2216 LLVM_DEBUG(
2217 dbgs() << "Not casting non-integral address space integer\n");
2218 return UnableToLegalize;
2219 }
2220
2221 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2222 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2223 }
2224
2225 // Widen SrcTy to WideTy. This does not affect the result, but since the
2226 // user requested this size, it is probably better handled than SrcTy and
2227 // should reduce the total number of legalization artifacts.
2228 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2229 SrcTy = WideTy;
2230 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2231 }
2232
2233 // Theres no unmerge type to target. Directly extract the bits from the
2234 // source type
2235 unsigned DstSize = DstTy.getSizeInBits();
2236
2237 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2238 for (int I = 1; I != NumDst; ++I) {
2239 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2240 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2241 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2242 }
2243
2244 MI.eraseFromParent();
2245 return Legalized;
2246 }
2247
2248 // Extend the source to a wider type.
2249 LLT LCMTy = getLCMType(SrcTy, WideTy);
2250
2251 Register WideSrc = SrcReg;
2252 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2253 // TODO: If this is an integral address space, cast to integer and anyext.
2254 if (SrcTy.isPointer()) {
2255 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2256 return UnableToLegalize;
2257 }
2258
2259 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2260 }
2261
2262 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2263
2264 // Create a sequence of unmerges and merges to the original results. Since we
2265 // may have widened the source, we will need to pad the results with dead defs
2266 // to cover the source register.
2267 // e.g. widen s48 to s64:
2268 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2269 //
2270 // =>
2271 // %4:_(s192) = G_ANYEXT %0:_(s96)
2272 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2273 // ; unpack to GCD type, with extra dead defs
2274 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2275 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2276 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2277 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2278 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2279 const LLT GCDTy = getGCDType(WideTy, DstTy);
2280 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2281 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2282
2283 // Directly unmerge to the destination without going through a GCD type
2284 // if possible
2285 if (PartsPerRemerge == 1) {
2286 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2287
2288 for (int I = 0; I != NumUnmerge; ++I) {
2289 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2290
2291 for (int J = 0; J != PartsPerUnmerge; ++J) {
2292 int Idx = I * PartsPerUnmerge + J;
2293 if (Idx < NumDst)
2294 MIB.addDef(MI.getOperand(Idx).getReg());
2295 else {
2296 // Create dead def for excess components.
2297 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2298 }
2299 }
2300
2301 MIB.addUse(Unmerge.getReg(I));
2302 }
2303 } else {
2305 for (int J = 0; J != NumUnmerge; ++J)
2306 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2307
2308 SmallVector<Register, 8> RemergeParts;
2309 for (int I = 0; I != NumDst; ++I) {
2310 for (int J = 0; J < PartsPerRemerge; ++J) {
2311 const int Idx = I * PartsPerRemerge + J;
2312 RemergeParts.emplace_back(Parts[Idx]);
2313 }
2314
2315 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2316 RemergeParts.clear();
2317 }
2318 }
2319
2320 MI.eraseFromParent();
2321 return Legalized;
2322}
2323
2325LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2326 LLT WideTy) {
2327 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2328 unsigned Offset = MI.getOperand(2).getImm();
2329
2330 if (TypeIdx == 0) {
2331 if (SrcTy.isVector() || DstTy.isVector())
2332 return UnableToLegalize;
2333
2334 SrcOp Src(SrcReg);
2335 if (SrcTy.isPointer()) {
2336 // Extracts from pointers can be handled only if they are really just
2337 // simple integers.
2339 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2340 return UnableToLegalize;
2341
2342 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2343 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2344 SrcTy = SrcAsIntTy;
2345 }
2346
2347 if (DstTy.isPointer())
2348 return UnableToLegalize;
2349
2350 if (Offset == 0) {
2351 // Avoid a shift in the degenerate case.
2352 MIRBuilder.buildTrunc(DstReg,
2353 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2354 MI.eraseFromParent();
2355 return Legalized;
2356 }
2357
2358 // Do a shift in the source type.
2359 LLT ShiftTy = SrcTy;
2360 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2361 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2362 ShiftTy = WideTy;
2363 }
2364
2365 auto LShr = MIRBuilder.buildLShr(
2366 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2367 MIRBuilder.buildTrunc(DstReg, LShr);
2368 MI.eraseFromParent();
2369 return Legalized;
2370 }
2371
2372 if (SrcTy.isScalar()) {
2374 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2376 return Legalized;
2377 }
2378
2379 if (!SrcTy.isVector())
2380 return UnableToLegalize;
2381
2382 if (DstTy != SrcTy.getElementType())
2383 return UnableToLegalize;
2384
2385 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2386 return UnableToLegalize;
2387
2389 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2390
2391 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2392 Offset);
2393 widenScalarDst(MI, WideTy.getScalarType(), 0);
2395 return Legalized;
2396}
2397
2399LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2400 LLT WideTy) {
2401 if (TypeIdx != 0 || WideTy.isVector())
2402 return UnableToLegalize;
2404 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2405 widenScalarDst(MI, WideTy);
2407 return Legalized;
2408}
2409
2411LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2412 LLT WideTy) {
2413 unsigned Opcode;
2414 unsigned ExtOpcode;
2415 std::optional<Register> CarryIn;
2416 switch (MI.getOpcode()) {
2417 default:
2418 llvm_unreachable("Unexpected opcode!");
2419 case TargetOpcode::G_SADDO:
2420 Opcode = TargetOpcode::G_ADD;
2421 ExtOpcode = TargetOpcode::G_SEXT;
2422 break;
2423 case TargetOpcode::G_SSUBO:
2424 Opcode = TargetOpcode::G_SUB;
2425 ExtOpcode = TargetOpcode::G_SEXT;
2426 break;
2427 case TargetOpcode::G_UADDO:
2428 Opcode = TargetOpcode::G_ADD;
2429 ExtOpcode = TargetOpcode::G_ZEXT;
2430 break;
2431 case TargetOpcode::G_USUBO:
2432 Opcode = TargetOpcode::G_SUB;
2433 ExtOpcode = TargetOpcode::G_ZEXT;
2434 break;
2435 case TargetOpcode::G_SADDE:
2436 Opcode = TargetOpcode::G_UADDE;
2437 ExtOpcode = TargetOpcode::G_SEXT;
2438 CarryIn = MI.getOperand(4).getReg();
2439 break;
2440 case TargetOpcode::G_SSUBE:
2441 Opcode = TargetOpcode::G_USUBE;
2442 ExtOpcode = TargetOpcode::G_SEXT;
2443 CarryIn = MI.getOperand(4).getReg();
2444 break;
2445 case TargetOpcode::G_UADDE:
2446 Opcode = TargetOpcode::G_UADDE;
2447 ExtOpcode = TargetOpcode::G_ZEXT;
2448 CarryIn = MI.getOperand(4).getReg();
2449 break;
2450 case TargetOpcode::G_USUBE:
2451 Opcode = TargetOpcode::G_USUBE;
2452 ExtOpcode = TargetOpcode::G_ZEXT;
2453 CarryIn = MI.getOperand(4).getReg();
2454 break;
2455 }
2456
2457 if (TypeIdx == 1) {
2458 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2459
2461 if (CarryIn)
2462 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2463 widenScalarDst(MI, WideTy, 1);
2464
2466 return Legalized;
2467 }
2468
2469 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2470 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2471 // Do the arithmetic in the larger type.
2472 Register NewOp;
2473 if (CarryIn) {
2474 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2475 NewOp = MIRBuilder
2476 .buildInstr(Opcode, {WideTy, CarryOutTy},
2477 {LHSExt, RHSExt, *CarryIn})
2478 .getReg(0);
2479 } else {
2480 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2481 }
2482 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2483 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2484 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2485 // There is no overflow if the ExtOp is the same as NewOp.
2486 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2487 // Now trunc the NewOp to the original result.
2488 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2489 MI.eraseFromParent();
2490 return Legalized;
2491}
2492
2494LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2495 LLT WideTy) {
2496 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2497 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2498 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2499 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2500 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2501 // We can convert this to:
2502 // 1. Any extend iN to iM
2503 // 2. SHL by M-N
2504 // 3. [US][ADD|SUB|SHL]SAT
2505 // 4. L/ASHR by M-N
2506 //
2507 // It may be more efficient to lower this to a min and a max operation in
2508 // the higher precision arithmetic if the promoted operation isn't legal,
2509 // but this decision is up to the target's lowering request.
2510 Register DstReg = MI.getOperand(0).getReg();
2511
2512 unsigned NewBits = WideTy.getScalarSizeInBits();
2513 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2514
2515 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2516 // must not left shift the RHS to preserve the shift amount.
2517 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2518 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2519 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2520 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2521 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2522 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2523
2524 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2525 {ShiftL, ShiftR}, MI.getFlags());
2526
2527 // Use a shift that will preserve the number of sign bits when the trunc is
2528 // folded away.
2529 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2530 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2531
2532 MIRBuilder.buildTrunc(DstReg, Result);
2533 MI.eraseFromParent();
2534 return Legalized;
2535}
2536
2538LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2539 LLT WideTy) {
2540 if (TypeIdx == 1) {
2542 widenScalarDst(MI, WideTy, 1);
2544 return Legalized;
2545 }
2546
2547 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2548 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2549 LLT SrcTy = MRI.getType(LHS);
2550 LLT OverflowTy = MRI.getType(OriginalOverflow);
2551 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2552
2553 // To determine if the result overflowed in the larger type, we extend the
2554 // input to the larger type, do the multiply (checking if it overflows),
2555 // then also check the high bits of the result to see if overflow happened
2556 // there.
2557 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2558 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2559 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2560
2561 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2562 // so we don't need to check the overflow result of larger type Mulo.
2563 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2564
2565 unsigned MulOpc =
2566 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2567
2569 if (WideMulCanOverflow)
2570 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2571 {LeftOperand, RightOperand});
2572 else
2573 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2574
2575 auto Mul = Mulo->getOperand(0);
2576 MIRBuilder.buildTrunc(Result, Mul);
2577
2578 MachineInstrBuilder ExtResult;
2579 // Overflow occurred if it occurred in the larger type, or if the high part
2580 // of the result does not zero/sign-extend the low part. Check this second
2581 // possibility first.
2582 if (IsSigned) {
2583 // For signed, overflow occurred when the high part does not sign-extend
2584 // the low part.
2585 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2586 } else {
2587 // Unsigned overflow occurred when the high part does not zero-extend the
2588 // low part.
2589 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2590 }
2591
2592 if (WideMulCanOverflow) {
2593 auto Overflow =
2594 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2595 // Finally check if the multiplication in the larger type itself overflowed.
2596 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2597 } else {
2598 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2599 }
2600 MI.eraseFromParent();
2601 return Legalized;
2602}
2603
2606 unsigned Opcode = MI.getOpcode();
2607 switch (Opcode) {
2608 default:
2609 return UnableToLegalize;
2610 case TargetOpcode::G_ATOMICRMW_XCHG:
2611 case TargetOpcode::G_ATOMICRMW_ADD:
2612 case TargetOpcode::G_ATOMICRMW_SUB:
2613 case TargetOpcode::G_ATOMICRMW_AND:
2614 case TargetOpcode::G_ATOMICRMW_OR:
2615 case TargetOpcode::G_ATOMICRMW_XOR:
2616 case TargetOpcode::G_ATOMICRMW_MIN:
2617 case TargetOpcode::G_ATOMICRMW_MAX:
2618 case TargetOpcode::G_ATOMICRMW_UMIN:
2619 case TargetOpcode::G_ATOMICRMW_UMAX:
2620 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2622 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2623 widenScalarDst(MI, WideTy, 0);
2625 return Legalized;
2626 case TargetOpcode::G_ATOMIC_CMPXCHG:
2627 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2629 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2630 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2631 widenScalarDst(MI, WideTy, 0);
2633 return Legalized;
2634 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2635 if (TypeIdx == 0) {
2637 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2638 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2639 widenScalarDst(MI, WideTy, 0);
2641 return Legalized;
2642 }
2643 assert(TypeIdx == 1 &&
2644 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2646 widenScalarDst(MI, WideTy, 1);
2648 return Legalized;
2649 case TargetOpcode::G_EXTRACT:
2650 return widenScalarExtract(MI, TypeIdx, WideTy);
2651 case TargetOpcode::G_INSERT:
2652 return widenScalarInsert(MI, TypeIdx, WideTy);
2653 case TargetOpcode::G_MERGE_VALUES:
2654 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2655 case TargetOpcode::G_UNMERGE_VALUES:
2656 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2657 case TargetOpcode::G_SADDO:
2658 case TargetOpcode::G_SSUBO:
2659 case TargetOpcode::G_UADDO:
2660 case TargetOpcode::G_USUBO:
2661 case TargetOpcode::G_SADDE:
2662 case TargetOpcode::G_SSUBE:
2663 case TargetOpcode::G_UADDE:
2664 case TargetOpcode::G_USUBE:
2665 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2666 case TargetOpcode::G_UMULO:
2667 case TargetOpcode::G_SMULO:
2668 return widenScalarMulo(MI, TypeIdx, WideTy);
2669 case TargetOpcode::G_SADDSAT:
2670 case TargetOpcode::G_SSUBSAT:
2671 case TargetOpcode::G_SSHLSAT:
2672 case TargetOpcode::G_UADDSAT:
2673 case TargetOpcode::G_USUBSAT:
2674 case TargetOpcode::G_USHLSAT:
2675 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2676 case TargetOpcode::G_CTTZ:
2677 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2678 case TargetOpcode::G_CTLZ:
2679 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2680 case TargetOpcode::G_CTPOP: {
2681 if (TypeIdx == 0) {
2683 widenScalarDst(MI, WideTy, 0);
2685 return Legalized;
2686 }
2687
2688 Register SrcReg = MI.getOperand(1).getReg();
2689
2690 // First extend the input.
2691 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2692 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2693 ? TargetOpcode::G_ANYEXT
2694 : TargetOpcode::G_ZEXT;
2695 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2696 LLT CurTy = MRI.getType(SrcReg);
2697 unsigned NewOpc = Opcode;
2698 if (NewOpc == TargetOpcode::G_CTTZ) {
2699 // The count is the same in the larger type except if the original
2700 // value was zero. This can be handled by setting the bit just off
2701 // the top of the original type.
2702 auto TopBit =
2704 MIBSrc = MIRBuilder.buildOr(
2705 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2706 // Now we know the operand is non-zero, use the more relaxed opcode.
2707 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2708 }
2709
2710 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2711
2712 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2713 // An optimization where the result is the CTLZ after the left shift by
2714 // (Difference in widety and current ty), that is,
2715 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2716 // Result = ctlz MIBSrc
2717 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2718 MIRBuilder.buildConstant(WideTy, SizeDiff));
2719 }
2720
2721 // Perform the operation at the larger size.
2722 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2723 // This is already the correct result for CTPOP and CTTZs
2724 if (Opcode == TargetOpcode::G_CTLZ) {
2725 // The correct result is NewOp - (Difference in widety and current ty).
2726 MIBNewOp = MIRBuilder.buildSub(
2727 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2728 }
2729
2730 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2731 MI.eraseFromParent();
2732 return Legalized;
2733 }
2734 case TargetOpcode::G_BSWAP: {
2736 Register DstReg = MI.getOperand(0).getReg();
2737
2738 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2739 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2740 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2741 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2742
2743 MI.getOperand(0).setReg(DstExt);
2744
2746
2747 LLT Ty = MRI.getType(DstReg);
2748 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2749 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2750 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2751
2752 MIRBuilder.buildTrunc(DstReg, ShrReg);
2754 return Legalized;
2755 }
2756 case TargetOpcode::G_BITREVERSE: {
2758
2759 Register DstReg = MI.getOperand(0).getReg();
2760 LLT Ty = MRI.getType(DstReg);
2761 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2762
2763 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2764 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2765 MI.getOperand(0).setReg(DstExt);
2767
2768 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2769 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2770 MIRBuilder.buildTrunc(DstReg, Shift);
2772 return Legalized;
2773 }
2774 case TargetOpcode::G_FREEZE:
2775 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2777 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2778 widenScalarDst(MI, WideTy);
2780 return Legalized;
2781
2782 case TargetOpcode::G_ABS:
2784 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2785 widenScalarDst(MI, WideTy);
2787 return Legalized;
2788
2789 case TargetOpcode::G_ADD:
2790 case TargetOpcode::G_AND:
2791 case TargetOpcode::G_MUL:
2792 case TargetOpcode::G_OR:
2793 case TargetOpcode::G_XOR:
2794 case TargetOpcode::G_SUB:
2795 case TargetOpcode::G_SHUFFLE_VECTOR:
2796 // Perform operation at larger width (any extension is fines here, high bits
2797 // don't affect the result) and then truncate the result back to the
2798 // original type.
2800 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2801 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2802 widenScalarDst(MI, WideTy);
2804 return Legalized;
2805
2806 case TargetOpcode::G_SBFX:
2807 case TargetOpcode::G_UBFX:
2809
2810 if (TypeIdx == 0) {
2811 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2812 widenScalarDst(MI, WideTy);
2813 } else {
2814 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2815 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2816 }
2817
2819 return Legalized;
2820
2821 case TargetOpcode::G_SHL:
2823
2824 if (TypeIdx == 0) {
2825 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2826 widenScalarDst(MI, WideTy);
2827 } else {
2828 assert(TypeIdx == 1);
2829 // The "number of bits to shift" operand must preserve its value as an
2830 // unsigned integer:
2831 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2832 }
2833
2835 return Legalized;
2836
2837 case TargetOpcode::G_ROTR:
2838 case TargetOpcode::G_ROTL:
2839 if (TypeIdx != 1)
2840 return UnableToLegalize;
2841
2843 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2845 return Legalized;
2846
2847 case TargetOpcode::G_SDIV:
2848 case TargetOpcode::G_SREM:
2849 case TargetOpcode::G_SMIN:
2850 case TargetOpcode::G_SMAX:
2852 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2853 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2854 widenScalarDst(MI, WideTy);
2856 return Legalized;
2857
2858 case TargetOpcode::G_SDIVREM:
2860 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2861 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2862 widenScalarDst(MI, WideTy);
2863 widenScalarDst(MI, WideTy, 1);
2865 return Legalized;
2866
2867 case TargetOpcode::G_ASHR:
2868 case TargetOpcode::G_LSHR:
2870
2871 if (TypeIdx == 0) {
2872 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2873 : TargetOpcode::G_ZEXT;
2874
2875 widenScalarSrc(MI, WideTy, 1, CvtOp);
2876 widenScalarDst(MI, WideTy);
2877 } else {
2878 assert(TypeIdx == 1);
2879 // The "number of bits to shift" operand must preserve its value as an
2880 // unsigned integer:
2881 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2882 }
2883
2885 return Legalized;
2886 case TargetOpcode::G_UDIV:
2887 case TargetOpcode::G_UREM:
2889 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2890 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2891 widenScalarDst(MI, WideTy);
2893 return Legalized;
2894 case TargetOpcode::G_UDIVREM:
2896 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2897 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2898 widenScalarDst(MI, WideTy);
2899 widenScalarDst(MI, WideTy, 1);
2901 return Legalized;
2902 case TargetOpcode::G_UMIN:
2903 case TargetOpcode::G_UMAX: {
2904 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2905
2906 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2907 unsigned ExtOpc =
2909 getApproximateEVTForLLT(WideTy, Ctx))
2910 ? TargetOpcode::G_SEXT
2911 : TargetOpcode::G_ZEXT;
2912
2914 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2915 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2916 widenScalarDst(MI, WideTy);
2918 return Legalized;
2919 }
2920
2921 case TargetOpcode::G_SELECT:
2923 if (TypeIdx == 0) {
2924 // Perform operation at larger width (any extension is fine here, high
2925 // bits don't affect the result) and then truncate the result back to the
2926 // original type.
2927 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2928 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2929 widenScalarDst(MI, WideTy);
2930 } else {
2931 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2932 // Explicit extension is required here since high bits affect the result.
2933 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2934 }
2936 return Legalized;
2937
2938 case TargetOpcode::G_FPTOSI:
2939 case TargetOpcode::G_FPTOUI:
2940 case TargetOpcode::G_INTRINSIC_LRINT:
2941 case TargetOpcode::G_INTRINSIC_LLRINT:
2942 case TargetOpcode::G_IS_FPCLASS:
2944
2945 if (TypeIdx == 0)
2946 widenScalarDst(MI, WideTy);
2947 else
2948 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2949
2951 return Legalized;
2952 case TargetOpcode::G_SITOFP:
2954
2955 if (TypeIdx == 0)
2956 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2957 else
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2959
2961 return Legalized;
2962 case TargetOpcode::G_UITOFP:
2964
2965 if (TypeIdx == 0)
2966 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2967 else
2968 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2969
2971 return Legalized;
2972 case TargetOpcode::G_FPTOSI_SAT:
2973 case TargetOpcode::G_FPTOUI_SAT:
2975
2976 if (TypeIdx == 0) {
2977 Register OldDst = MI.getOperand(0).getReg();
2978 LLT Ty = MRI.getType(OldDst);
2979 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2980 Register NewDst;
2981 MI.getOperand(0).setReg(ExtReg);
2982 uint64_t ShortBits = Ty.getScalarSizeInBits();
2983 uint64_t WideBits = WideTy.getScalarSizeInBits();
2985 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2986 // z = i16 fptosi_sat(a)
2987 // ->
2988 // x = i32 fptosi_sat(a)
2989 // y = smin(x, 32767)
2990 // z = smax(y, -32768)
2991 auto MaxVal = MIRBuilder.buildConstant(
2992 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2993 auto MinVal = MIRBuilder.buildConstant(
2994 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2995 Register MidReg =
2996 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2997 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2998 } else {
2999 // z = i16 fptoui_sat(a)
3000 // ->
3001 // x = i32 fptoui_sat(a)
3002 // y = smin(x, 65535)
3003 auto MaxVal = MIRBuilder.buildConstant(
3004 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3005 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3006 }
3007 MIRBuilder.buildTrunc(OldDst, NewDst);
3008 } else
3009 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3010
3012 return Legalized;
3013 case TargetOpcode::G_LOAD:
3014 case TargetOpcode::G_SEXTLOAD:
3015 case TargetOpcode::G_ZEXTLOAD:
3017 widenScalarDst(MI, WideTy);
3019 return Legalized;
3020
3021 case TargetOpcode::G_STORE: {
3022 if (TypeIdx != 0)
3023 return UnableToLegalize;
3024
3025 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3026 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3027 if (!Ty.isScalar()) {
3028 // We need to widen the vector element type.
3030 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3031 // We also need to adjust the MMO to turn this into a truncating store.
3032 MachineMemOperand &MMO = **MI.memoperands_begin();
3034 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3035 MI.setMemRefs(MF, {NewMMO});
3037 return Legalized;
3038 }
3039
3041
3042 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3043 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3044 widenScalarSrc(MI, WideTy, 0, ExtType);
3045
3047 return Legalized;
3048 }
3049 case TargetOpcode::G_CONSTANT: {
3050 MachineOperand &SrcMO = MI.getOperand(1);
3052 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3053 MRI.getType(MI.getOperand(0).getReg()));
3054 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3055 ExtOpc == TargetOpcode::G_ANYEXT) &&
3056 "Illegal Extend");
3057 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3058 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3059 ? SrcVal.sext(WideTy.getSizeInBits())
3060 : SrcVal.zext(WideTy.getSizeInBits());
3062 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3063
3064 widenScalarDst(MI, WideTy);
3066 return Legalized;
3067 }
3068 case TargetOpcode::G_FCONSTANT: {
3069 // To avoid changing the bits of the constant due to extension to a larger
3070 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3071 MachineOperand &SrcMO = MI.getOperand(1);
3072 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3074 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3075 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3076 MI.eraseFromParent();
3077 return Legalized;
3078 }
3079 case TargetOpcode::G_IMPLICIT_DEF: {
3081 widenScalarDst(MI, WideTy);
3083 return Legalized;
3084 }
3085 case TargetOpcode::G_BRCOND:
3087 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3089 return Legalized;
3090
3091 case TargetOpcode::G_FCMP:
3093 if (TypeIdx == 0)
3094 widenScalarDst(MI, WideTy);
3095 else {
3096 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3097 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3098 }
3100 return Legalized;
3101
3102 case TargetOpcode::G_ICMP:
3104 if (TypeIdx == 0)
3105 widenScalarDst(MI, WideTy);
3106 else {
3107 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3108 CmpInst::Predicate Pred =
3109 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3110
3111 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3112 unsigned ExtOpcode =
3113 (CmpInst::isSigned(Pred) ||
3115 getApproximateEVTForLLT(WideTy, Ctx)))
3116 ? TargetOpcode::G_SEXT
3117 : TargetOpcode::G_ZEXT;
3118 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3119 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3120 }
3122 return Legalized;
3123
3124 case TargetOpcode::G_PTR_ADD:
3125 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3127 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3129 return Legalized;
3130
3131 case TargetOpcode::G_PHI: {
3132 assert(TypeIdx == 0 && "Expecting only Idx 0");
3133
3135 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3136 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3138 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3139 }
3140
3141 MachineBasicBlock &MBB = *MI.getParent();
3143 widenScalarDst(MI, WideTy);
3145 return Legalized;
3146 }
3147 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3148 if (TypeIdx == 0) {
3149 Register VecReg = MI.getOperand(1).getReg();
3150 LLT VecTy = MRI.getType(VecReg);
3152
3154 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3155 TargetOpcode::G_ANYEXT);
3156
3157 widenScalarDst(MI, WideTy, 0);
3159 return Legalized;
3160 }
3161
3162 if (TypeIdx != 2)
3163 return UnableToLegalize;
3165 // TODO: Probably should be zext
3166 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3168 return Legalized;
3169 }
3170 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3171 if (TypeIdx == 0) {
3173 const LLT WideEltTy = WideTy.getElementType();
3174
3175 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3176 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3177 widenScalarDst(MI, WideTy, 0);
3179 return Legalized;
3180 }
3181
3182 if (TypeIdx == 1) {
3184
3185 Register VecReg = MI.getOperand(1).getReg();
3186 LLT VecTy = MRI.getType(VecReg);
3187 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3188
3189 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3190 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3191 widenScalarDst(MI, WideVecTy, 0);
3193 return Legalized;
3194 }
3195
3196 if (TypeIdx == 2) {
3198 // TODO: Probably should be zext
3199 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3201 return Legalized;
3202 }
3203
3204 return UnableToLegalize;
3205 }
3206 case TargetOpcode::G_FADD:
3207 case TargetOpcode::G_FMUL:
3208 case TargetOpcode::G_FSUB:
3209 case TargetOpcode::G_FMA:
3210 case TargetOpcode::G_FMAD:
3211 case TargetOpcode::G_FNEG:
3212 case TargetOpcode::G_FABS:
3213 case TargetOpcode::G_FCANONICALIZE:
3214 case TargetOpcode::G_FMINNUM:
3215 case TargetOpcode::G_FMAXNUM:
3216 case TargetOpcode::G_FMINNUM_IEEE:
3217 case TargetOpcode::G_FMAXNUM_IEEE:
3218 case TargetOpcode::G_FMINIMUM:
3219 case TargetOpcode::G_FMAXIMUM:
3220 case TargetOpcode::G_FDIV:
3221 case TargetOpcode::G_FREM:
3222 case TargetOpcode::G_FCEIL:
3223 case TargetOpcode::G_FFLOOR:
3224 case TargetOpcode::G_FCOS:
3225 case TargetOpcode::G_FSIN:
3226 case TargetOpcode::G_FTAN:
3227 case TargetOpcode::G_FACOS:
3228 case TargetOpcode::G_FASIN:
3229 case TargetOpcode::G_FATAN:
3230 case TargetOpcode::G_FATAN2:
3231 case TargetOpcode::G_FCOSH:
3232 case TargetOpcode::G_FSINH:
3233 case TargetOpcode::G_FTANH:
3234 case TargetOpcode::G_FLOG10:
3235 case TargetOpcode::G_FLOG:
3236 case TargetOpcode::G_FLOG2:
3237 case TargetOpcode::G_FRINT:
3238 case TargetOpcode::G_FNEARBYINT:
3239 case TargetOpcode::G_FSQRT:
3240 case TargetOpcode::G_FEXP:
3241 case TargetOpcode::G_FEXP2:
3242 case TargetOpcode::G_FEXP10:
3243 case TargetOpcode::G_FPOW:
3244 case TargetOpcode::G_INTRINSIC_TRUNC:
3245 case TargetOpcode::G_INTRINSIC_ROUND:
3246 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3247 assert(TypeIdx == 0);
3249
3250 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3251 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3252
3253 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3255 return Legalized;
3256 case TargetOpcode::G_FPOWI:
3257 case TargetOpcode::G_FLDEXP:
3258 case TargetOpcode::G_STRICT_FLDEXP: {
3259 if (TypeIdx == 0) {
3260 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3261 return UnableToLegalize;
3262
3264 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3265 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3267 return Legalized;
3268 }
3269
3270 if (TypeIdx == 1) {
3271 // For some reason SelectionDAG tries to promote to a libcall without
3272 // actually changing the integer type for promotion.
3274 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3276 return Legalized;
3277 }
3278
3279 return UnableToLegalize;
3280 }
3281 case TargetOpcode::G_FFREXP: {
3283
3284 if (TypeIdx == 0) {
3285 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3286 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3287 } else {
3288 widenScalarDst(MI, WideTy, 1);
3289 }
3290
3292 return Legalized;
3293 }
3294 case TargetOpcode::G_INTTOPTR:
3295 if (TypeIdx != 1)
3296 return UnableToLegalize;
3297
3299 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3301 return Legalized;
3302 case TargetOpcode::G_PTRTOINT:
3303 if (TypeIdx != 0)
3304 return UnableToLegalize;
3305
3307 widenScalarDst(MI, WideTy, 0);
3309 return Legalized;
3310 case TargetOpcode::G_BUILD_VECTOR: {
3312
3313 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3314 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3315 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3316
3317 // Avoid changing the result vector type if the source element type was
3318 // requested.
3319 if (TypeIdx == 1) {
3320 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3321 } else {
3322 widenScalarDst(MI, WideTy, 0);
3323 }
3324
3326 return Legalized;
3327 }
3328 case TargetOpcode::G_SEXT_INREG:
3329 if (TypeIdx != 0)
3330 return UnableToLegalize;
3331
3333 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3334 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3336 return Legalized;
3337 case TargetOpcode::G_PTRMASK: {
3338 if (TypeIdx != 1)
3339 return UnableToLegalize;
3341 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3343 return Legalized;
3344 }
3345 case TargetOpcode::G_VECREDUCE_FADD:
3346 case TargetOpcode::G_VECREDUCE_FMUL:
3347 case TargetOpcode::G_VECREDUCE_FMIN:
3348 case TargetOpcode::G_VECREDUCE_FMAX:
3349 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3350 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3351 if (TypeIdx != 0)
3352 return UnableToLegalize;
3354 Register VecReg = MI.getOperand(1).getReg();
3355 LLT VecTy = MRI.getType(VecReg);
3356 LLT WideVecTy = VecTy.isVector()
3357 ? LLT::vector(VecTy.getElementCount(), WideTy)
3358 : WideTy;
3359 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3360 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3362 return Legalized;
3363 }
3364 case TargetOpcode::G_VSCALE: {
3365 MachineOperand &SrcMO = MI.getOperand(1);
3367 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3368 // The CImm is always a signed value
3369 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3371 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3372 widenScalarDst(MI, WideTy);
3374 return Legalized;
3375 }
3376 case TargetOpcode::G_SPLAT_VECTOR: {
3377 if (TypeIdx != 1)
3378 return UnableToLegalize;
3379
3381 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3383 return Legalized;
3384 }
3385 case TargetOpcode::G_INSERT_SUBVECTOR: {
3386 if (TypeIdx != 0)
3387 return UnableToLegalize;
3388
3389 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3390 Register BigVec = IS.getBigVec();
3391 Register SubVec = IS.getSubVec();
3392
3393 LLT SubVecTy = MRI.getType(SubVec);
3394 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3395
3396 // Widen the G_INSERT_SUBVECTOR
3397 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3398 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3399 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3400 IS.getIndexImm());
3401
3402 // Truncate back down
3403 auto SplatZero = MIRBuilder.buildSplatVector(
3404 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3406 SplatZero);
3407
3408 MI.eraseFromParent();
3409
3410 return Legalized;
3411 }
3412 }
3413}
3414
3416 MachineIRBuilder &B, Register Src, LLT Ty) {
3417 auto Unmerge = B.buildUnmerge(Ty, Src);
3418 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3419 Pieces.push_back(Unmerge.getReg(I));
3420}
3421
3422static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3423 MachineIRBuilder &MIRBuilder) {
3424 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3425 MachineFunction &MF = MIRBuilder.getMF();
3426 const DataLayout &DL = MIRBuilder.getDataLayout();
3427 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3428 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3429 LLT DstLLT = MRI.getType(DstReg);
3430
3431 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3432
3433 auto Addr = MIRBuilder.buildConstantPool(
3434 AddrPtrTy,
3435 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3436
3437 MachineMemOperand *MMO =
3439 MachineMemOperand::MOLoad, DstLLT, Alignment);
3440
3441 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3442}
3443
3446 const MachineOperand &ConstOperand = MI.getOperand(1);
3447 const Constant *ConstantVal = ConstOperand.getCImm();
3448
3449 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3450 MI.eraseFromParent();
3451
3452 return Legalized;
3453}
3454
3457 const MachineOperand &ConstOperand = MI.getOperand(1);
3458 const Constant *ConstantVal = ConstOperand.getFPImm();
3459
3460 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3461 MI.eraseFromParent();
3462
3463 return Legalized;
3464}
3465
3468 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3469 if (SrcTy.isVector()) {
3470 LLT SrcEltTy = SrcTy.getElementType();
3472
3473 if (DstTy.isVector()) {
3474 int NumDstElt = DstTy.getNumElements();
3475 int NumSrcElt = SrcTy.getNumElements();
3476
3477 LLT DstEltTy = DstTy.getElementType();
3478 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3479 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3480
3481 // If there's an element size mismatch, insert intermediate casts to match
3482 // the result element type.
3483 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3484 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3485 //
3486 // =>
3487 //
3488 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3489 // %3:_(<2 x s8>) = G_BITCAST %2
3490 // %4:_(<2 x s8>) = G_BITCAST %3
3491 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3492 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3493 SrcPartTy = SrcEltTy;
3494 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3495 //
3496 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3497 //
3498 // =>
3499 //
3500 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3501 // %3:_(s16) = G_BITCAST %2
3502 // %4:_(s16) = G_BITCAST %3
3503 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3504 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3505 DstCastTy = DstEltTy;
3506 }
3507
3508 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3509 for (Register &SrcReg : SrcRegs)
3510 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3511 } else
3512 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3513
3514 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3515 MI.eraseFromParent();
3516 return Legalized;
3517 }
3518
3519 if (DstTy.isVector()) {
3521 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3522 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3523 MI.eraseFromParent();
3524 return Legalized;
3525 }
3526
3527 return UnableToLegalize;
3528}
3529
3530/// Figure out the bit offset into a register when coercing a vector index for
3531/// the wide element type. This is only for the case when promoting vector to
3532/// one with larger elements.
3533//
3534///
3535/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3536/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3538 Register Idx,
3539 unsigned NewEltSize,
3540 unsigned OldEltSize) {
3541 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3542 LLT IdxTy = B.getMRI()->getType(Idx);
3543
3544 // Now figure out the amount we need to shift to get the target bits.
3545 auto OffsetMask = B.buildConstant(
3546 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3547 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3548 return B.buildShl(IdxTy, OffsetIdx,
3549 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3550}
3551
3552/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3553/// is casting to a vector with a smaller element size, perform multiple element
3554/// extracts and merge the results. If this is coercing to a vector with larger
3555/// elements, index the bitcasted vector and extract the target element with bit
3556/// operations. This is intended to force the indexing in the native register
3557/// size for architectures that can dynamically index the register file.
3560 LLT CastTy) {
3561 if (TypeIdx != 1)
3562 return UnableToLegalize;
3563
3564 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3565
3566 LLT SrcEltTy = SrcVecTy.getElementType();
3567 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3568 unsigned OldNumElts = SrcVecTy.getNumElements();
3569
3570 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3571 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3572
3573 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3574 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3575 if (NewNumElts > OldNumElts) {
3576 // Decreasing the vector element size
3577 //
3578 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3579 // =>
3580 // v4i32:castx = bitcast x:v2i64
3581 //
3582 // i64 = bitcast
3583 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3584 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3585 //
3586 if (NewNumElts % OldNumElts != 0)
3587 return UnableToLegalize;
3588
3589 // Type of the intermediate result vector.
3590 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3591 LLT MidTy =
3592 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3593
3594 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3595
3596 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3597 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3598
3599 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3600 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3601 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3602 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3603 NewOps[I] = Elt.getReg(0);
3604 }
3605
3606 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3607 MIRBuilder.buildBitcast(Dst, NewVec);
3608 MI.eraseFromParent();
3609 return Legalized;
3610 }
3611
3612 if (NewNumElts < OldNumElts) {
3613 if (NewEltSize % OldEltSize != 0)
3614 return UnableToLegalize;
3615
3616 // This only depends on powers of 2 because we use bit tricks to figure out
3617 // the bit offset we need to shift to get the target element. A general
3618 // expansion could emit division/multiply.
3619 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3620 return UnableToLegalize;
3621
3622 // Increasing the vector element size.
3623 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3624 //
3625 // =>
3626 //
3627 // %cast = G_BITCAST %vec
3628 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3629 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3630 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3631 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3632 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3633 // %elt = G_TRUNC %elt_bits
3634
3635 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3636 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3637
3638 // Divide to get the index in the wider element type.
3639 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3640
3641 Register WideElt = CastVec;
3642 if (CastTy.isVector()) {
3643 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3644 ScaledIdx).getReg(0);
3645 }
3646
3647 // Compute the bit offset into the register of the target element.
3649 MIRBuilder, Idx, NewEltSize, OldEltSize);
3650
3651 // Shift the wide element to get the target element.
3652 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3653 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3654 MI.eraseFromParent();
3655 return Legalized;
3656 }
3657
3658 return UnableToLegalize;
3659}
3660
3661/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3662/// TargetReg, while preserving other bits in \p TargetReg.
3663///
3664/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3666 Register TargetReg, Register InsertReg,
3667 Register OffsetBits) {
3668 LLT TargetTy = B.getMRI()->getType(TargetReg);
3669 LLT InsertTy = B.getMRI()->getType(InsertReg);
3670 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3671 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3672
3673 // Produce a bitmask of the value to insert
3674 auto EltMask = B.buildConstant(
3675 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3676 InsertTy.getSizeInBits()));
3677 // Shift it into position
3678 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3679 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3680
3681 // Clear out the bits in the wide element
3682 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3683
3684 // The value to insert has all zeros already, so stick it into the masked
3685 // wide element.
3686 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3687}
3688
3689/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3690/// is increasing the element size, perform the indexing in the target element
3691/// type, and use bit operations to insert at the element position. This is
3692/// intended for architectures that can dynamically index the register file and
3693/// want to force indexing in the native register size.
3696 LLT CastTy) {
3697 if (TypeIdx != 0)
3698 return UnableToLegalize;
3699
3700 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3701 MI.getFirst4RegLLTs();
3702 LLT VecTy = DstTy;
3703
3704 LLT VecEltTy = VecTy.getElementType();
3705 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3706 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3707 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3708
3709 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3710 unsigned OldNumElts = VecTy.getNumElements();
3711
3712 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3713 if (NewNumElts < OldNumElts) {
3714 if (NewEltSize % OldEltSize != 0)
3715 return UnableToLegalize;
3716
3717 // This only depends on powers of 2 because we use bit tricks to figure out
3718 // the bit offset we need to shift to get the target element. A general
3719 // expansion could emit division/multiply.
3720 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3721 return UnableToLegalize;
3722
3723 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3724 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3725
3726 // Divide to get the index in the wider element type.
3727 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3728
3729 Register ExtractedElt = CastVec;
3730 if (CastTy.isVector()) {
3731 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3732 ScaledIdx).getReg(0);
3733 }
3734
3735 // Compute the bit offset into the register of the target element.
3737 MIRBuilder, Idx, NewEltSize, OldEltSize);
3738
3739 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3740 Val, OffsetBits);
3741 if (CastTy.isVector()) {
3743 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3744 }
3745
3746 MIRBuilder.buildBitcast(Dst, InsertedElt);
3747 MI.eraseFromParent();
3748 return Legalized;
3749 }
3750
3751 return UnableToLegalize;
3752}
3753
3754// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3755// those that have smaller than legal operands.
3756//
3757// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3758//
3759// ===>
3760//
3761// s32 = G_BITCAST <4 x s8>
3762// s32 = G_BITCAST <4 x s8>
3763// s32 = G_BITCAST <4 x s8>
3764// s32 = G_BITCAST <4 x s8>
3765// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3766// <16 x s8> = G_BITCAST <4 x s32>
3769 LLT CastTy) {
3770 // Convert it to CONCAT instruction
3771 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3772 if (!ConcatMI) {
3773 return UnableToLegalize;
3774 }
3775
3776 // Check if bitcast is Legal
3777 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3778 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3779
3780 // Check if the build vector is Legal
3781 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3782 return UnableToLegalize;
3783 }
3784
3785 // Bitcast the sources
3786 SmallVector<Register> BitcastRegs;
3787 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3788 BitcastRegs.push_back(
3789 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3790 .getReg(0));
3791 }
3792
3793 // Build the scalar values into a vector
3794 Register BuildReg =
3795 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3796 MIRBuilder.buildBitcast(DstReg, BuildReg);
3797
3798 MI.eraseFromParent();
3799 return Legalized;
3800}
3801
3802// This bitcasts a shuffle vector to a different type currently of the same
3803// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3804// will be used instead.
3805//
3806// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3807// ===>
3808// <4 x s64> = G_PTRTOINT <4 x p0>
3809// <4 x s64> = G_PTRTOINT <4 x p0>
3810// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3811// <16 x p0> = G_INTTOPTR <16 x s64>
3814 LLT CastTy) {
3815 auto ShuffleMI = cast<GShuffleVector>(&MI);
3816 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3817 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3818
3819 // We currently only handle vectors of the same size.
3820 if (TypeIdx != 0 ||
3821 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3822 CastTy.getElementCount() != DstTy.getElementCount())
3823 return UnableToLegalize;
3824
3825 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3826
3827 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3828 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3829 auto Shuf =
3830 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3831 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3832
3833 MI.eraseFromParent();
3834 return Legalized;
3835}
3836
3837/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3838///
3839/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3840///
3841/// ===>
3842///
3843/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3844/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3845/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3848 LLT CastTy) {
3849 auto ES = cast<GExtractSubvector>(&MI);
3850
3851 if (!CastTy.isVector())
3852 return UnableToLegalize;
3853
3854 if (TypeIdx != 0)
3855 return UnableToLegalize;
3856
3857 Register Dst = ES->getReg(0);
3858 Register Src = ES->getSrcVec();
3859 uint64_t Idx = ES->getIndexImm();
3860
3862
3863 LLT DstTy = MRI.getType(Dst);
3864 LLT SrcTy = MRI.getType(Src);
3865 ElementCount DstTyEC = DstTy.getElementCount();
3866 ElementCount SrcTyEC = SrcTy.getElementCount();
3867 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3868 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3869
3870 if (DstTy == CastTy)
3871 return Legalized;
3872
3873 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3874 return UnableToLegalize;
3875
3876 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3877 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3878 if (CastEltSize < DstEltSize)
3879 return UnableToLegalize;
3880
3881 auto AdjustAmt = CastEltSize / DstEltSize;
3882 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3883 SrcTyMinElts % AdjustAmt != 0)
3884 return UnableToLegalize;
3885
3886 Idx /= AdjustAmt;
3887 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3888 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3889 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3890 MIRBuilder.buildBitcast(Dst, PromotedES);
3891
3892 ES->eraseFromParent();
3893 return Legalized;
3894}
3895
3896/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3897///
3898/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3899/// <vscale x 8 x i1>,
3900/// N
3901///
3902/// ===>
3903///
3904/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3905/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3906/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3907/// <vscale x 1 x i8>, N / 8
3908/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3911 LLT CastTy) {
3912 auto ES = cast<GInsertSubvector>(&MI);
3913
3914 if (!CastTy.isVector())
3915 return UnableToLegalize;
3916
3917 if (TypeIdx != 0)
3918 return UnableToLegalize;
3919
3920 Register Dst = ES->getReg(0);
3921 Register BigVec = ES->getBigVec();
3922 Register SubVec = ES->getSubVec();
3923 uint64_t Idx = ES->getIndexImm();
3924
3926
3927 LLT DstTy = MRI.getType(Dst);
3928 LLT BigVecTy = MRI.getType(BigVec);
3929 LLT SubVecTy = MRI.getType(SubVec);
3930
3931 if (DstTy == CastTy)
3932 return Legalized;
3933
3934 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3935 return UnableToLegalize;
3936
3937 ElementCount DstTyEC = DstTy.getElementCount();
3938 ElementCount BigVecTyEC = BigVecTy.getElementCount();
3939 ElementCount SubVecTyEC = SubVecTy.getElementCount();
3940 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3941 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
3942 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
3943
3944 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3945 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3946 if (CastEltSize < DstEltSize)
3947 return UnableToLegalize;
3948
3949 auto AdjustAmt = CastEltSize / DstEltSize;
3950 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3951 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3952 return UnableToLegalize;
3953
3954 Idx /= AdjustAmt;
3955 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3956 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3957 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
3958 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
3959 auto PromotedIS =
3960 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
3961 MIRBuilder.buildBitcast(Dst, PromotedIS);
3962
3963 ES->eraseFromParent();
3964 return Legalized;
3965}
3966
3968 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3969 Register DstReg = LoadMI.getDstReg();
3970 Register PtrReg = LoadMI.getPointerReg();
3971 LLT DstTy = MRI.getType(DstReg);
3972 MachineMemOperand &MMO = LoadMI.getMMO();
3973 LLT MemTy = MMO.getMemoryType();
3975
3976 unsigned MemSizeInBits = MemTy.getSizeInBits();
3977 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3978
3979 if (MemSizeInBits != MemStoreSizeInBits) {
3980 if (MemTy.isVector())
3981 return UnableToLegalize;
3982
3983 // Promote to a byte-sized load if not loading an integral number of
3984 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3985 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3986 MachineMemOperand *NewMMO =
3987 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3988
3989 Register LoadReg = DstReg;
3990 LLT LoadTy = DstTy;
3991
3992 // If this wasn't already an extending load, we need to widen the result
3993 // register to avoid creating a load with a narrower result than the source.
3994 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3995 LoadTy = WideMemTy;
3996 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3997 }
3998
3999 if (isa<GSExtLoad>(LoadMI)) {
4000 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4001 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4002 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4003 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4004 // The extra bits are guaranteed to be zero, since we stored them that
4005 // way. A zext load from Wide thus automatically gives zext from MemVT.
4006 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4007 } else {
4008 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4009 }
4010
4011 if (DstTy != LoadTy)
4012 MIRBuilder.buildTrunc(DstReg, LoadReg);
4013
4014 LoadMI.eraseFromParent();
4015 return Legalized;
4016 }
4017
4018 // Big endian lowering not implemented.
4020 return UnableToLegalize;
4021
4022 // This load needs splitting into power of 2 sized loads.
4023 //
4024 // Our strategy here is to generate anyextending loads for the smaller
4025 // types up to next power-2 result type, and then combine the two larger
4026 // result values together, before truncating back down to the non-pow-2
4027 // type.
4028 // E.g. v1 = i24 load =>
4029 // v2 = i32 zextload (2 byte)
4030 // v3 = i32 load (1 byte)
4031 // v4 = i32 shl v3, 16
4032 // v5 = i32 or v4, v2
4033 // v1 = i24 trunc v5
4034 // By doing this we generate the correct truncate which should get
4035 // combined away as an artifact with a matching extend.
4036
4037 uint64_t LargeSplitSize, SmallSplitSize;
4038
4039 if (!isPowerOf2_32(MemSizeInBits)) {
4040 // This load needs splitting into power of 2 sized loads.
4041 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4042 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4043 } else {
4044 // This is already a power of 2, but we still need to split this in half.
4045 //
4046 // Assume we're being asked to decompose an unaligned load.
4047 // TODO: If this requires multiple splits, handle them all at once.
4048 auto &Ctx = MF.getFunction().getContext();
4049 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4050 return UnableToLegalize;
4051
4052 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4053 }
4054
4055 if (MemTy.isVector()) {
4056 // TODO: Handle vector extloads
4057 if (MemTy != DstTy)
4058 return UnableToLegalize;
4059
4060 // TODO: We can do better than scalarizing the vector and at least split it
4061 // in half.
4062 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4063 }
4064
4065 MachineMemOperand *LargeMMO =
4066 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4067 MachineMemOperand *SmallMMO =
4068 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4069
4070 LLT PtrTy = MRI.getType(PtrReg);
4071 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4072 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4073 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4074 PtrReg, *LargeMMO);
4075
4076 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4077 LargeSplitSize / 8);
4078 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4079 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
4080 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4081 SmallPtr, *SmallMMO);
4082
4083 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4084 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4085
4086 if (AnyExtTy == DstTy)
4087 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4088 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4089 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4090 MIRBuilder.buildTrunc(DstReg, {Or});
4091 } else {
4092 assert(DstTy.isPointer() && "expected pointer");
4093 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4094
4095 // FIXME: We currently consider this to be illegal for non-integral address
4096 // spaces, but we need still need a way to reinterpret the bits.
4097 MIRBuilder.buildIntToPtr(DstReg, Or);
4098 }
4099
4100 LoadMI.eraseFromParent();
4101 return Legalized;
4102}
4103
4105 // Lower a non-power of 2 store into multiple pow-2 stores.
4106 // E.g. split an i24 store into an i16 store + i8 store.
4107 // We do this by first extending the stored value to the next largest power
4108 // of 2 type, and then using truncating stores to store the components.
4109 // By doing this, likewise with G_LOAD, generate an extend that can be
4110 // artifact-combined away instead of leaving behind extracts.
4111 Register SrcReg = StoreMI.getValueReg();
4112 Register PtrReg = StoreMI.getPointerReg();
4113 LLT SrcTy = MRI.getType(SrcReg);
4115 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4116 LLT MemTy = MMO.getMemoryType();
4117
4118 unsigned StoreWidth = MemTy.getSizeInBits();
4119 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4120
4121 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4122 // Promote to a byte-sized store with upper bits zero if not
4123 // storing an integral number of bytes. For example, promote
4124 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4125 LLT WideTy = LLT::scalar(StoreSizeInBits);
4126
4127 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4128 // Avoid creating a store with a narrower source than result.
4129 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4130 SrcTy = WideTy;
4131 }
4132
4133 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4134
4135 MachineMemOperand *NewMMO =
4136 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4137 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4138 StoreMI.eraseFromParent();
4139 return Legalized;
4140 }
4141
4142 if (MemTy.isVector()) {
4143 if (MemTy != SrcTy)
4144 return scalarizeVectorBooleanStore(StoreMI);
4145
4146 // TODO: We can do better than scalarizing the vector and at least split it
4147 // in half.
4148 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4149 }
4150
4151 unsigned MemSizeInBits = MemTy.getSizeInBits();
4152 uint64_t LargeSplitSize, SmallSplitSize;
4153
4154 if (!isPowerOf2_32(MemSizeInBits)) {
4155 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4156 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4157 } else {
4158 auto &Ctx = MF.getFunction().getContext();
4159 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4160 return UnableToLegalize; // Don't know what we're being asked to do.
4161
4162 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4163 }
4164
4165 // Extend to the next pow-2. If this store was itself the result of lowering,
4166 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4167 // that's wider than the stored size.
4168 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4169 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4170
4171 if (SrcTy.isPointer()) {
4172 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4173 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4174 }
4175
4176 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4177
4178 // Obtain the smaller value by shifting away the larger value.
4179 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4180 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4181
4182 // Generate the PtrAdd and truncating stores.
4183 LLT PtrTy = MRI.getType(PtrReg);
4184 auto OffsetCst = MIRBuilder.buildConstant(
4185 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4186 auto SmallPtr =
4187 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
4188
4189 MachineMemOperand *LargeMMO =
4190 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4191 MachineMemOperand *SmallMMO =
4192 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4193 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4194 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4195 StoreMI.eraseFromParent();
4196 return Legalized;
4197}
4198
4201 Register SrcReg = StoreMI.getValueReg();
4202 Register PtrReg = StoreMI.getPointerReg();
4203 LLT SrcTy = MRI.getType(SrcReg);
4204 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4205 LLT MemTy = MMO.getMemoryType();
4206 LLT MemScalarTy = MemTy.getElementType();
4208
4209 assert(SrcTy.isVector() && "Expect a vector store type");
4210
4211 if (!MemScalarTy.isByteSized()) {
4212 // We need to build an integer scalar of the vector bit pattern.
4213 // It's not legal for us to add padding when storing a vector.
4214 unsigned NumBits = MemTy.getSizeInBits();
4215 LLT IntTy = LLT::scalar(NumBits);
4216 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4217 LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
4218
4219 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4221 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4222 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4223 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4224 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4225 ? (MemTy.getNumElements() - 1) - I
4226 : I;
4227 auto ShiftAmt = MIRBuilder.buildConstant(
4228 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4229 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4230 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4231 }
4232 auto PtrInfo = MMO.getPointerInfo();
4233 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4234 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4235 StoreMI.eraseFromParent();
4236 return Legalized;
4237 }
4238
4239 // TODO: implement simple scalarization.
4240 return UnableToLegalize;
4241}
4242
4244LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4245 switch (MI.getOpcode()) {
4246 case TargetOpcode::G_LOAD: {
4247 if (TypeIdx != 0)
4248 return UnableToLegalize;
4249 MachineMemOperand &MMO = **MI.memoperands_begin();
4250
4251 // Not sure how to interpret a bitcast of an extending load.
4252 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4253 return UnableToLegalize;
4254
4256 bitcastDst(MI, CastTy, 0);
4257 MMO.setType(CastTy);
4258 // The range metadata is no longer valid when reinterpreted as a different
4259 // type.
4260 MMO.clearRanges();
4262 return Legalized;
4263 }
4264 case TargetOpcode::G_STORE: {
4265 if (TypeIdx != 0)
4266 return UnableToLegalize;
4267
4268 MachineMemOperand &MMO = **MI.memoperands_begin();
4269
4270 // Not sure how to interpret a bitcast of a truncating store.
4271 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4272 return UnableToLegalize;
4273
4275 bitcastSrc(MI, CastTy, 0);
4276 MMO.setType(CastTy);
4278 return Legalized;
4279 }
4280 case TargetOpcode::G_SELECT: {
4281 if (TypeIdx != 0)
4282 return UnableToLegalize;
4283
4284 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4285 LLVM_DEBUG(
4286 dbgs() << "bitcast action not implemented for vector select\n");
4287 return UnableToLegalize;
4288 }
4289
4291 bitcastSrc(MI, CastTy, 2);
4292 bitcastSrc(MI, CastTy, 3);
4293 bitcastDst(MI, CastTy, 0);
4295 return Legalized;
4296 }
4297 case TargetOpcode::G_AND:
4298 case TargetOpcode::G_OR:
4299 case TargetOpcode::G_XOR: {
4301 bitcastSrc(MI, CastTy, 1);
4302 bitcastSrc(MI, CastTy, 2);
4303 bitcastDst(MI, CastTy, 0);
4305 return Legalized;
4306 }
4307 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4308 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4309 case TargetOpcode::G_INSERT_VECTOR_ELT:
4310 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4311 case TargetOpcode::G_CONCAT_VECTORS:
4312 return bitcastConcatVector(MI, TypeIdx, CastTy);
4313 case TargetOpcode::G_SHUFFLE_VECTOR:
4314 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4315 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4316 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4317 case TargetOpcode::G_INSERT_SUBVECTOR:
4318 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4319 default:
4320 return UnableToLegalize;
4321 }
4322}
4323
4324// Legalize an instruction by changing the opcode in place.
4325void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4327 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4329}
4330
4332LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4333 using namespace TargetOpcode;
4334
4335 switch(MI.getOpcode()) {
4336 default:
4337 return UnableToLegalize;
4338 case TargetOpcode::G_FCONSTANT:
4339 return lowerFConstant(MI);
4340 case TargetOpcode::G_BITCAST:
4341 return lowerBitcast(MI);
4342 case TargetOpcode::G_SREM:
4343 case TargetOpcode::G_UREM: {
4344 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4345 auto Quot =
4346 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4347 {MI.getOperand(1), MI.getOperand(2)});
4348
4349 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4350 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4351 MI.eraseFromParent();
4352 return Legalized;
4353 }
4354 case TargetOpcode::G_SADDO:
4355 case TargetOpcode::G_SSUBO:
4356 return lowerSADDO_SSUBO(MI);
4357 case TargetOpcode::G_UMULH:
4358 case TargetOpcode::G_SMULH:
4359 return lowerSMULH_UMULH(MI);
4360 case TargetOpcode::G_SMULO:
4361 case TargetOpcode::G_UMULO: {
4362 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4363 // result.
4364 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4365 LLT Ty = MRI.getType(Res);
4366
4367 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4368 ? TargetOpcode::G_SMULH
4369 : TargetOpcode::G_UMULH;
4370
4372 const auto &TII = MIRBuilder.getTII();
4373 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4374 MI.removeOperand(1);
4376
4377 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4378 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4379
4380 // Move insert point forward so we can use the Res register if needed.
4382
4383 // For *signed* multiply, overflow is detected by checking:
4384 // (hi != (lo >> bitwidth-1))
4385 if (Opcode == TargetOpcode::G_SMULH) {
4386 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4387 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4388 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4389 } else {
4390 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4391 }
4392 return Legalized;
4393 }
4394 case TargetOpcode::G_FNEG: {
4395 auto [Res, SubByReg] = MI.getFirst2Regs();
4396 LLT Ty = MRI.getType(Res);
4397
4398 auto SignMask = MIRBuilder.buildConstant(
4400 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4401 MI.eraseFromParent();
4402 return Legalized;
4403 }
4404 case TargetOpcode::G_FSUB:
4405 case TargetOpcode::G_STRICT_FSUB: {
4406 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4407 LLT Ty = MRI.getType(Res);
4408
4409 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4410 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4411
4412 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4413 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4414 else
4415 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4416
4417 MI.eraseFromParent();
4418 return Legalized;
4419 }
4420 case TargetOpcode::G_FMAD:
4421 return lowerFMad(MI);
4422 case TargetOpcode::G_FFLOOR:
4423 return lowerFFloor(MI);
4424 case TargetOpcode::G_LROUND:
4425 case TargetOpcode::G_LLROUND: {
4426 Register DstReg = MI.getOperand(0).getReg();
4427 Register SrcReg = MI.getOperand(1).getReg();
4428 LLT SrcTy = MRI.getType(SrcReg);
4429 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4430 {SrcReg});
4431 MIRBuilder.buildFPTOSI(DstReg, Round);
4432 MI.eraseFromParent();
4433 return Legalized;
4434 }
4435 case TargetOpcode::G_INTRINSIC_ROUND:
4436 return lowerIntrinsicRound(MI);
4437 case TargetOpcode::G_FRINT: {
4438 // Since round even is the assumed rounding mode for unconstrained FP
4439 // operations, rint and roundeven are the same operation.
4440 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4441 return Legalized;
4442 }
4443 case TargetOpcode::G_INTRINSIC_LRINT:
4444 case TargetOpcode::G_INTRINSIC_LLRINT: {
4445 Register DstReg = MI.getOperand(0).getReg();
4446 Register SrcReg = MI.getOperand(1).getReg();
4447 LLT SrcTy = MRI.getType(SrcReg);
4448 auto Round =
4449 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4450 MIRBuilder.buildFPTOSI(DstReg, Round);
4451 MI.eraseFromParent();
4452 return Legalized;
4453 }
4454 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4455 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4456 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4457 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4458 **MI.memoperands_begin());
4459 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4460 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4461 MI.eraseFromParent();
4462 return Legalized;
4463 }
4464 case TargetOpcode::G_LOAD:
4465 case TargetOpcode::G_SEXTLOAD:
4466 case TargetOpcode::G_ZEXTLOAD:
4467 return lowerLoad(cast<GAnyLoad>(MI));
4468 case TargetOpcode::G_STORE:
4469 return lowerStore(cast<GStore>(MI));
4470 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4471 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4472 case TargetOpcode::G_CTLZ:
4473 case TargetOpcode::G_CTTZ:
4474 case TargetOpcode::G_CTPOP:
4475 return lowerBitCount(MI);
4476 case G_UADDO: {
4477 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4478
4479 Register NewRes = MRI.cloneVirtualRegister(Res);
4480
4481 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4482 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4483
4484 MIRBuilder.buildCopy(Res, NewRes);
4485
4486 MI.eraseFromParent();
4487 return Legalized;
4488 }
4489 case G_UADDE: {
4490 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4491 const LLT CondTy = MRI.getType(CarryOut);
4492 const LLT Ty = MRI.getType(Res);
4493
4494 Register NewRes = MRI.cloneVirtualRegister(Res);
4495
4496 // Initial add of the two operands.
4497 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4498
4499 // Initial check for carry.
4500 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4501
4502 // Add the sum and the carry.
4503 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4504 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4505
4506 // Second check for carry. We can only carry if the initial sum is all 1s
4507 // and the carry is set, resulting in a new sum of 0.
4508 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4509 auto ResEqZero =
4510 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4511 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4512 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4513
4514 MIRBuilder.buildCopy(Res, NewRes);
4515
4516 MI.eraseFromParent();
4517 return Legalized;
4518 }
4519 case G_USUBO: {
4520 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4521
4522 MIRBuilder.buildSub(Res, LHS, RHS);
4524
4525 MI.eraseFromParent();
4526 return Legalized;
4527 }
4528 case G_USUBE: {
4529 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4530 const LLT CondTy = MRI.getType(BorrowOut);
4531 const LLT Ty = MRI.getType(Res);
4532
4533 // Initial subtract of the two operands.
4534 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4535
4536 // Initial check for borrow.
4537 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4538
4539 // Subtract the borrow from the first subtract.
4540 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4541 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4542
4543 // Second check for borrow. We can only borrow if the initial difference is
4544 // 0 and the borrow is set, resulting in a new difference of all 1s.
4545 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4546 auto TmpResEqZero =
4547 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4548 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4549 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4550
4551 MI.eraseFromParent();
4552 return Legalized;
4553 }
4554 case G_UITOFP:
4555 return lowerUITOFP(MI);
4556 case G_SITOFP:
4557 return lowerSITOFP(MI);
4558 case G_FPTOUI:
4559 return lowerFPTOUI(MI);
4560 case G_FPTOSI:
4561 return lowerFPTOSI(MI);
4562 case G_FPTOUI_SAT:
4563 case G_FPTOSI_SAT:
4564 return lowerFPTOINT_SAT(MI);
4565 case G_FPTRUNC:
4566 return lowerFPTRUNC(MI);
4567 case G_FPOWI:
4568 return lowerFPOWI(MI);
4569 case G_SMIN:
4570 case G_SMAX:
4571 case G_UMIN:
4572 case G_UMAX:
4573 return lowerMinMax(MI);
4574 case G_SCMP:
4575 case G_UCMP:
4576 return lowerThreewayCompare(MI);
4577 case G_FCOPYSIGN:
4578 return lowerFCopySign(MI);
4579 case G_FMINNUM:
4580 case G_FMAXNUM:
4581 return lowerFMinNumMaxNum(MI);
4582 case G_MERGE_VALUES:
4583 return lowerMergeValues(MI);
4584 case G_UNMERGE_VALUES:
4585 return lowerUnmergeValues(MI);
4586 case TargetOpcode::G_SEXT_INREG: {
4587 assert(MI.getOperand(2).isImm() && "Expected immediate");
4588 int64_t SizeInBits = MI.getOperand(2).getImm();
4589
4590 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4591 LLT DstTy = MRI.getType(DstReg);
4592 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4593
4594 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4595 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4596 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4597 MI.eraseFromParent();
4598 return Legalized;
4599 }
4600 case G_EXTRACT_VECTOR_ELT:
4601 case G_INSERT_VECTOR_ELT:
4603 case G_SHUFFLE_VECTOR:
4604 return lowerShuffleVector(MI);
4605 case G_VECTOR_COMPRESS:
4606 return lowerVECTOR_COMPRESS(MI);
4607 case G_DYN_STACKALLOC:
4608 return lowerDynStackAlloc(MI);
4609 case G_STACKSAVE:
4610 return lowerStackSave(MI);
4611 case G_STACKRESTORE:
4612 return lowerStackRestore(MI);
4613 case G_EXTRACT:
4614 return lowerExtract(MI);
4615 case G_INSERT:
4616 return lowerInsert(MI);
4617 case G_BSWAP:
4618 return lowerBswap(MI);
4619 case G_BITREVERSE:
4620 return lowerBitreverse(MI);
4621 case G_READ_REGISTER:
4622 case G_WRITE_REGISTER:
4623 return lowerReadWriteRegister(MI);
4624 case G_UADDSAT:
4625 case G_USUBSAT: {
4626 // Try to make a reasonable guess about which lowering strategy to use. The
4627 // target can override this with custom lowering and calling the
4628 // implementation functions.
4629 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4630 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4631 return lowerAddSubSatToMinMax(MI);
4633 }
4634 case G_SADDSAT:
4635 case G_SSUBSAT: {
4636 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4637
4638 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4639 // since it's a shorter expansion. However, we would need to figure out the
4640 // preferred boolean type for the carry out for the query.
4641 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4642 return lowerAddSubSatToMinMax(MI);
4644 }
4645 case G_SSHLSAT:
4646 case G_USHLSAT:
4647 return lowerShlSat(MI);
4648 case G_ABS:
4649 return lowerAbsToAddXor(MI);
4650 case G_FABS:
4651 return lowerFAbs(MI);
4652 case G_SELECT:
4653 return lowerSelect(MI);
4654 case G_IS_FPCLASS:
4655 return lowerISFPCLASS(MI);
4656 case G_SDIVREM:
4657 case G_UDIVREM:
4658 return lowerDIVREM(MI);
4659 case G_FSHL:
4660 case G_FSHR:
4661 return lowerFunnelShift(MI);
4662 case G_ROTL:
4663 case G_ROTR:
4664 return lowerRotate(MI);
4665 case G_MEMSET:
4666 case G_MEMCPY:
4667 case G_MEMMOVE:
4668 return lowerMemCpyFamily(MI);
4669 case G_MEMCPY_INLINE:
4670 return lowerMemcpyInline(MI);
4671 case G_ZEXT:
4672 case G_SEXT:
4673 case G_ANYEXT:
4674 return lowerEXT(MI);
4675 case G_TRUNC:
4676 return lowerTRUNC(MI);
4678 return lowerVectorReduction(MI);
4679 case G_VAARG:
4680 return lowerVAArg(MI);
4681 }
4682}
4683
4685 Align MinAlign) const {
4686 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4687 // datalayout for the preferred alignment. Also there should be a target hook
4688 // for this to allow targets to reduce the alignment and ignore the
4689 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4690 // the type.
4691 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4692}
4693
4696 MachinePointerInfo &PtrInfo) {
4699 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4700
4701 unsigned AddrSpace = DL.getAllocaAddrSpace();
4702 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4703
4704 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4705 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4706}
4707
4709 const SrcOp &Val) {
4710 LLT SrcTy = Val.getLLTTy(MRI);
4711 Align StackTypeAlign =
4712 std::max(getStackTemporaryAlignment(SrcTy),
4714 MachinePointerInfo PtrInfo;
4715 auto StackTemp =
4716 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4717
4718 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4719 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4720}
4721
4723 LLT VecTy) {
4724 LLT IdxTy = B.getMRI()->getType(IdxReg);
4725 unsigned NElts = VecTy.getNumElements();
4726
4727 int64_t IdxVal;
4728 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4729 if (IdxVal < VecTy.getNumElements())
4730 return IdxReg;
4731 // If a constant index would be out of bounds, clamp it as well.
4732 }
4733
4734 if (isPowerOf2_32(NElts)) {
4735 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4736 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4737 }
4738
4739 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4740 .getReg(0);
4741}
4742
4744 Register Index) {
4745 LLT EltTy = VecTy.getElementType();
4746
4747 // Calculate the element offset and add it to the pointer.
4748 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4749 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4750 "Converting bits to bytes lost precision");
4751
4752 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4753
4754 // Convert index to the correct size for the address space.
4756 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4757 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4758 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4759 if (IdxTy != MRI.getType(Index))
4760 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4761
4762 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4763 MIRBuilder.buildConstant(IdxTy, EltSize));
4764
4765 LLT PtrTy = MRI.getType(VecPtr);
4766 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4767}
4768
4769#ifndef NDEBUG
4770/// Check that all vector operands have same number of elements. Other operands
4771/// should be listed in NonVecOp.
4774 std::initializer_list<unsigned> NonVecOpIndices) {
4775 if (MI.getNumMemOperands() != 0)
4776 return false;
4777
4778 LLT VecTy = MRI.getType(MI.getReg(0));
4779 if (!VecTy.isVector())
4780 return false;
4781 unsigned NumElts = VecTy.getNumElements();
4782
4783 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4784 MachineOperand &Op = MI.getOperand(OpIdx);
4785 if (!Op.isReg()) {
4786 if (!is_contained(NonVecOpIndices, OpIdx))
4787 return false;
4788 continue;
4789 }
4790
4791 LLT Ty = MRI.getType(Op.getReg());
4792 if (!Ty.isVector()) {
4793 if (!is_contained(NonVecOpIndices, OpIdx))
4794 return false;
4795 continue;
4796 }
4797
4798 if (Ty.getNumElements() != NumElts)
4799 return false;
4800 }
4801
4802 return true;
4803}
4804#endif
4805
4806/// Fill \p DstOps with DstOps that have same number of elements combined as
4807/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4808/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4809/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4810static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4811 unsigned NumElts) {
4812 LLT LeftoverTy;
4813 assert(Ty.isVector() && "Expected vector type");
4814 LLT EltTy = Ty.getElementType();
4815 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4816 int NumParts, NumLeftover;
4817 std::tie(NumParts, NumLeftover) =
4818 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4819
4820 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4821 for (int i = 0; i < NumParts; ++i) {
4822 DstOps.push_back(NarrowTy);
4823 }
4824
4825 if (LeftoverTy.isValid()) {
4826 assert(NumLeftover == 1 && "expected exactly one leftover");
4827 DstOps.push_back(LeftoverTy);
4828 }
4829}
4830
4831/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4832/// made from \p Op depending on operand type.
4833static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4834 MachineOperand &Op) {
4835 for (unsigned i = 0; i < N; ++i) {
4836 if (Op.isReg())
4837 Ops.push_back(Op.getReg());
4838 else if (Op.isImm())
4839 Ops.push_back(Op.getImm());
4840 else if (Op.isPredicate())
4841 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4842 else
4843 llvm_unreachable("Unsupported type");
4844 }
4845}
4846
4847// Handle splitting vector operations which need to have the same number of
4848// elements in each type index, but each type index may have a different element
4849// type.
4850//
4851// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4852// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4853// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4854//
4855// Also handles some irregular breakdown cases, e.g.
4856// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4857// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4858// s64 = G_SHL s64, s32
4861 GenericMachineInstr &MI, unsigned NumElts,
4862 std::initializer_list<unsigned> NonVecOpIndices) {
4863 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4864 "Non-compatible opcode or not specified non-vector operands");
4865 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4866
4867 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4868 unsigned NumDefs = MI.getNumDefs();
4869
4870 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4871 // Build instructions with DstOps to use instruction found by CSE directly.
4872 // CSE copies found instruction into given vreg when building with vreg dest.
4873 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4874 // Output registers will be taken from created instructions.
4875 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4876 for (unsigned i = 0; i < NumDefs; ++i) {
4877 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4878 }
4879
4880 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4881 // Operands listed in NonVecOpIndices will be used as is without splitting;
4882 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4883 // scalar condition (op 1), immediate in sext_inreg (op 2).
4884 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4885 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4886 ++UseIdx, ++UseNo) {
4887 if (is_contained(NonVecOpIndices, UseIdx)) {
4888 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4889 MI.getOperand(UseIdx));
4890 } else {
4891 SmallVector<Register, 8> SplitPieces;
4892 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4893 MRI);
4894 for (auto Reg : SplitPieces)
4895 InputOpsPieces[UseNo].push_back(Reg);
4896 }
4897 }
4898
4899 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4900
4901 // Take i-th piece of each input operand split and build sub-vector/scalar
4902 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4903 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4905 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4906 Defs.push_back(OutputOpsPieces[DstNo][i]);
4907
4909 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4910 Uses.push_back(InputOpsPieces[InputNo][i]);
4911
4912 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4913 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4914 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4915 }
4916
4917 // Merge small outputs into MI's output for each def operand.
4918 if (NumLeftovers) {
4919 for (unsigned i = 0; i < NumDefs; ++i)
4920 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4921 } else {
4922 for (unsigned i = 0; i < NumDefs; ++i)
4923 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4924 }
4925
4926 MI.eraseFromParent();
4927 return Legalized;
4928}
4929
4932 unsigned NumElts) {
4933 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4934
4935 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4936 unsigned NumDefs = MI.getNumDefs();
4937
4938 SmallVector<DstOp, 8> OutputOpsPieces;
4939 SmallVector<Register, 8> OutputRegs;
4940 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4941
4942 // Instructions that perform register split will be inserted in basic block
4943 // where register is defined (basic block is in the next operand).
4944 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4945 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4946 UseIdx += 2, ++UseNo) {
4947 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4949 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4950 MIRBuilder, MRI);
4951 }
4952
4953 // Build PHIs with fewer elements.
4954 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4955 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4956 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4957 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4958 Phi.addDef(
4959 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4960 OutputRegs.push_back(Phi.getReg(0));
4961
4962 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4963 Phi.addUse(InputOpsPieces[j][i]);
4964 Phi.add(MI.getOperand(1 + j * 2 + 1));
4965 }
4966 }
4967
4968 // Set the insert point after the existing PHIs
4969 MachineBasicBlock &MBB = *MI.getParent();
4971
4972 // Merge small outputs into MI's def.
4973 if (NumLeftovers) {
4974 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4975 } else {
4976 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4977 }
4978
4979 MI.eraseFromParent();
4980 return Legalized;
4981}
4982
4985 unsigned TypeIdx,
4986 LLT NarrowTy) {
4987 const int NumDst = MI.getNumOperands() - 1;
4988 const Register SrcReg = MI.getOperand(NumDst).getReg();
4989 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4990 LLT SrcTy = MRI.getType(SrcReg);
4991
4992 if (TypeIdx != 1 || NarrowTy == DstTy)
4993 return UnableToLegalize;
4994
4995 // Requires compatible types. Otherwise SrcReg should have been defined by
4996 // merge-like instruction that would get artifact combined. Most likely
4997 // instruction that defines SrcReg has to perform more/fewer elements
4998 // legalization compatible with NarrowTy.
4999 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5000 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5001
5002 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5003 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5004 return UnableToLegalize;
5005
5006 // This is most likely DstTy (smaller then register size) packed in SrcTy
5007 // (larger then register size) and since unmerge was not combined it will be
5008 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5009 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5010
5011 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5012 //
5013 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5014 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5015 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5016 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5017 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5018 const int PartsPerUnmerge = NumDst / NumUnmerge;
5019
5020 for (int I = 0; I != NumUnmerge; ++I) {
5021 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5022
5023 for (int J = 0; J != PartsPerUnmerge; ++J)
5024 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5025 MIB.addUse(Unmerge.getReg(I));
5026 }
5027
5028 MI.eraseFromParent();
5029 return Legalized;
5030}
5031
5034 LLT NarrowTy) {
5035 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5036 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5037 // that should have been artifact combined. Most likely instruction that uses
5038 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5039 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5040 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5041 if (NarrowTy == SrcTy)
5042 return UnableToLegalize;
5043
5044 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5045 // is for old mir tests. Since the changes to more/fewer elements it should no
5046 // longer be possible to generate MIR like this when starting from llvm-ir
5047 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5048 if (TypeIdx == 1) {
5049 assert(SrcTy.isVector() && "Expected vector types");
5050 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5051 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5052 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5053 return UnableToLegalize;
5054 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5055 //
5056 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5057 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5058 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5059 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5060 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5061 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5062
5064 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5065 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5066 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5067 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5068 Elts.push_back(Unmerge.getReg(j));
5069 }
5070
5071 SmallVector<Register, 8> NarrowTyElts;
5072 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5073 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5074 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5075 ++i, Offset += NumNarrowTyElts) {
5076 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5077 NarrowTyElts.push_back(
5078 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5079 }
5080
5081 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5082 MI.eraseFromParent();
5083 return Legalized;
5084 }
5085
5086 assert(TypeIdx == 0 && "Bad type index");
5087 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5088 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5089 return UnableToLegalize;
5090
5091 // This is most likely SrcTy (smaller then register size) packed in DstTy
5092 // (larger then register size) and since merge was not combined it will be
5093 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5094 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5095
5096 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5097 //
5098 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5099 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5100 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5101 SmallVector<Register, 8> NarrowTyElts;
5102 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5103 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5104 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5105 for (unsigned i = 0; i < NumParts; ++i) {
5107 for (unsigned j = 0; j < NumElts; ++j)
5108 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5109 NarrowTyElts.push_back(
5110 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5111 }
5112
5113 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5114 MI.eraseFromParent();
5115 return Legalized;
5116}
5117
5120 unsigned TypeIdx,
5121 LLT NarrowVecTy) {
5122 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5123 Register InsertVal;
5124 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5125
5126 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5127 if (IsInsert)
5128 InsertVal = MI.getOperand(2).getReg();
5129
5130 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5131
5132 // TODO: Handle total scalarization case.
5133 if (!NarrowVecTy.isVector())
5134 return UnableToLegalize;
5135
5136 LLT VecTy = MRI.getType(SrcVec);
5137
5138 // If the index is a constant, we can really break this down as you would
5139 // expect, and index into the target size pieces.
5140 int64_t IdxVal;
5141 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5142 if (MaybeCst) {
5143 IdxVal = MaybeCst->Value.getSExtValue();
5144 // Avoid out of bounds indexing the pieces.
5145 if (IdxVal >= VecTy.getNumElements()) {
5146 MIRBuilder.buildUndef(DstReg);
5147 MI.eraseFromParent();
5148 return Legalized;
5149 }
5150
5151 SmallVector<Register, 8> VecParts;
5152 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5153
5154 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5155 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5156 TargetOpcode::G_ANYEXT);
5157
5158 unsigned NewNumElts = NarrowVecTy.getNumElements();
5159
5160 LLT IdxTy = MRI.getType(Idx);
5161 int64_t PartIdx = IdxVal / NewNumElts;
5162 auto NewIdx =
5163 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5164
5165 if (IsInsert) {
5166 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5167
5168 // Use the adjusted index to insert into one of the subvectors.
5169 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5170 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5171 VecParts[PartIdx] = InsertPart.getReg(0);
5172
5173 // Recombine the inserted subvector with the others to reform the result
5174 // vector.
5175 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5176 } else {
5177 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5178 }
5179
5180 MI.eraseFromParent();
5181 return Legalized;
5182 }
5183
5184 // With a variable index, we can't perform the operation in a smaller type, so
5185 // we're forced to expand this.
5186 //
5187 // TODO: We could emit a chain of compare/select to figure out which piece to
5188 // index.
5190}
5191
5194 LLT NarrowTy) {
5195 // FIXME: Don't know how to handle secondary types yet.
5196 if (TypeIdx != 0)
5197 return UnableToLegalize;
5198
5199 // This implementation doesn't work for atomics. Give up instead of doing
5200 // something invalid.
5201 if (LdStMI.isAtomic())
5202 return UnableToLegalize;
5203
5204 bool IsLoad = isa<GLoad>(LdStMI);
5205 Register ValReg = LdStMI.getReg(0);
5206 Register AddrReg = LdStMI.getPointerReg();
5207 LLT ValTy = MRI.getType(ValReg);
5208
5209 // FIXME: Do we need a distinct NarrowMemory legalize action?
5210 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5211 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5212 return UnableToLegalize;
5213 }
5214
5215 int NumParts = -1;
5216 int NumLeftover = -1;
5217 LLT LeftoverTy;
5218 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5219 if (IsLoad) {
5220 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5221 } else {
5222 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5223 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5224 NumParts = NarrowRegs.size();
5225 NumLeftover = NarrowLeftoverRegs.size();
5226 }
5227 }
5228
5229 if (NumParts == -1)
5230 return UnableToLegalize;
5231
5232 LLT PtrTy = MRI.getType(AddrReg);
5233 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5234
5235 unsigned TotalSize = ValTy.getSizeInBits();
5236
5237 // Split the load/store into PartTy sized pieces starting at Offset. If this
5238 // is a load, return the new registers in ValRegs. For a store, each elements
5239 // of ValRegs should be PartTy. Returns the next offset that needs to be
5240 // handled.
5242 auto MMO = LdStMI.getMMO();
5243 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5244 unsigned NumParts, unsigned Offset) -> unsigned {
5246 unsigned PartSize = PartTy.getSizeInBits();
5247 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5248 ++Idx) {
5249 unsigned ByteOffset = Offset / 8;
5250 Register NewAddrReg;
5251
5252 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
5253
5254 MachineMemOperand *NewMMO =
5255 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5256
5257 if (IsLoad) {
5258 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5259 ValRegs.push_back(Dst);
5260 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5261 } else {
5262 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5263 }
5264 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5265 }
5266
5267 return Offset;
5268 };
5269
5270 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5271 unsigned HandledOffset =
5272 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5273
5274 // Handle the rest of the register if this isn't an even type breakdown.
5275 if (LeftoverTy.isValid())
5276 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5277
5278 if (IsLoad) {
5279 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5280 LeftoverTy, NarrowLeftoverRegs);
5281 }
5282
5283 LdStMI.eraseFromParent();
5284 return Legalized;
5285}
5286
5289 LLT NarrowTy) {
5290 using namespace TargetOpcode;
5291 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5292 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5293
5294 switch (MI.getOpcode()) {
5295 case G_IMPLICIT_DEF:
5296 case G_TRUNC:
5297 case G_AND:
5298 case G_OR:
5299 case G_XOR:
5300 case G_ADD:
5301 case G_SUB:
5302 case G_MUL:
5303 case G_PTR_ADD:
5304 case G_SMULH:
5305 case G_UMULH:
5306 case G_FADD:
5307 case G_FMUL:
5308 case G_FSUB:
5309 case G_FNEG:
5310 case G_FABS:
5311 case G_FCANONICALIZE:
5312 case G_FDIV:
5313 case G_FREM:
5314 case G_FMA:
5315 case G_FMAD:
5316 case G_FPOW:
5317 case G_FEXP:
5318 case G_FEXP2:
5319 case G_FEXP10:
5320 case G_FLOG:
5321 case G_FLOG2:
5322 case G_FLOG10:
5323 case G_FLDEXP:
5324 case G_FNEARBYINT:
5325 case G_FCEIL:
5326 case G_FFLOOR:
5327 case G_FRINT:
5328 case G_INTRINSIC_LRINT:
5329 case G_INTRINSIC_LLRINT:
5330 case G_INTRINSIC_ROUND:
5331 case G_INTRINSIC_ROUNDEVEN:
5332 case G_LROUND:
5333 case G_LLROUND:
5334 case G_INTRINSIC_TRUNC:
5335 case G_FCOS:
5336 case G_FSIN:
5337 case G_FTAN:
5338 case G_FACOS:
5339 case G_FASIN:
5340 case G_FATAN:
5341 case G_FATAN2:
5342 case G_FCOSH:
5343 case G_FSINH:
5344 case G_FTANH:
5345 case G_FSQRT:
5346 case G_BSWAP:
5347 case G_BITREVERSE:
5348 case G_SDIV:
5349 case G_UDIV:
5350 case G_SREM:
5351 case G_UREM:
5352 case G_SDIVREM:
5353 case G_UDIVREM:
5354 case G_SMIN:
5355 case G_SMAX:
5356 case G_UMIN:
5357 case G_UMAX:
5358 case G_ABS:
5359 case G_FMINNUM:
5360 case G_FMAXNUM:
5361 case G_FMINNUM_IEEE:
5362 case G_FMAXNUM_IEEE:
5363 case G_FMINIMUM:
5364 case G_FMAXIMUM:
5365 case G_FSHL:
5366 case G_FSHR:
5367 case G_ROTL:
5368 case G_ROTR:
5369 case G_FREEZE:
5370 case G_SADDSAT:
5371 case G_SSUBSAT:
5372 case G_UADDSAT:
5373 case G_USUBSAT:
5374 case G_UMULO:
5375 case G_SMULO:
5376 case G_SHL:
5377 case G_LSHR:
5378 case G_ASHR:
5379 case G_SSHLSAT:
5380 case G_USHLSAT:
5381 case G_CTLZ:
5382 case G_CTLZ_ZERO_UNDEF:
5383 case G_CTTZ:
5384 case G_CTTZ_ZERO_UNDEF:
5385 case G_CTPOP:
5386 case G_FCOPYSIGN:
5387 case G_ZEXT:
5388 case G_SEXT:
5389 case G_ANYEXT:
5390 case G_FPEXT:
5391 case G_FPTRUNC:
5392 case G_SITOFP:
5393 case G_UITOFP:
5394 case G_FPTOSI:
5395 case G_FPTOUI:
5396 case G_FPTOSI_SAT:
5397 case G_FPTOUI_SAT:
5398 case G_INTTOPTR:
5399 case G_PTRTOINT:
5400 case G_ADDRSPACE_CAST:
5401 case G_UADDO:
5402 case G_USUBO:
5403 case G_UADDE:
5404 case G_USUBE:
5405 case G_SADDO:
5406 case G_SSUBO:
5407 case G_SADDE:
5408 case G_SSUBE:
5409 case G_STRICT_FADD:
5410 case G_STRICT_FSUB:
5411 case G_STRICT_FMUL:
5412 case G_STRICT_FMA:
5413 case G_STRICT_FLDEXP:
5414 case G_FFREXP:
5415 return fewerElementsVectorMultiEltType(GMI, NumElts);
5416 case G_ICMP:
5417 case G_FCMP:
5418 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5419 case G_IS_FPCLASS:
5420 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5421 case G_SELECT:
5422 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5423 return fewerElementsVectorMultiEltType(GMI, NumElts);
5424 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5425 case G_PHI:
5426 return fewerElementsVectorPhi(GMI, NumElts);
5427 case G_UNMERGE_VALUES:
5428 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5429 case G_BUILD_VECTOR:
5430 assert(TypeIdx == 0 && "not a vector type index");
5431 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5432 case G_CONCAT_VECTORS:
5433 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5434 return UnableToLegalize;
5435 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5436 case G_EXTRACT_VECTOR_ELT:
5437 case G_INSERT_VECTOR_ELT:
5438 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5439 case G_LOAD:
5440 case G_STORE:
5441 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5442 case G_SEXT_INREG:
5443 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5445 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5446 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5447 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5448 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5449 case G_SHUFFLE_VECTOR:
5450 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5451 case G_FPOWI:
5452 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5453 case G_BITCAST:
5454 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5455 case G_INTRINSIC_FPTRUNC_ROUND:
5456 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5457 default:
5458 return UnableToLegalize;
5459 }
5460}
5461
5464 LLT NarrowTy) {
5465 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5466 "Not a bitcast operation");
5467
5468 if (TypeIdx != 0)
5469 return UnableToLegalize;
5470
5471 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5472
5473 unsigned NewElemCount =
5474 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5475 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5476
5477 // Split the Src and Dst Reg into smaller registers
5478 SmallVector<Register> SrcVRegs, BitcastVRegs;
5479 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5480 return UnableToLegalize;
5481
5482 // Build new smaller bitcast instructions
5483 // Not supporting Leftover types for now but will have to
5484 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5485 BitcastVRegs.push_back(
5486 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5487
5488 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5489 MI.eraseFromParent();
5490 return Legalized;
5491}
5492
5494 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5495 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5496 if (TypeIdx != 0)
5497 return UnableToLegalize;
5498
5499 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5500 MI.getFirst3RegLLTs();
5501 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5502 // The shuffle should be canonicalized by now.
5503 if (DstTy != Src1Ty)
5504 return UnableToLegalize;
5505 if (DstTy != Src2Ty)
5506 return UnableToLegalize;
5507
5508 if (!isPowerOf2_32(DstTy.getNumElements()))
5509 return UnableToLegalize;
5510
5511 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5512 // Further legalization attempts will be needed to do split further.
5513 NarrowTy =
5514 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5515 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5516
5517 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5518 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5519 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5520 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5521 SplitSrc2Regs[1]};
5522
5523 Register Hi, Lo;
5524
5525 // If Lo or Hi uses elements from at most two of the four input vectors, then
5526 // express it as a vector shuffle of those two inputs. Otherwise extract the
5527 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5529 for (unsigned High = 0; High < 2; ++High) {
5530 Register &Output = High ? Hi : Lo;
5531
5532 // Build a shuffle mask for the output, discovering on the fly which
5533 // input vectors to use as shuffle operands (recorded in InputUsed).
5534 // If building a suitable shuffle vector proves too hard, then bail
5535 // out with useBuildVector set.
5536 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5537 unsigned FirstMaskIdx = High * NewElts;
5538 bool UseBuildVector = false;
5539 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5540 // The mask element. This indexes into the input.
5541 int Idx = Mask[FirstMaskIdx + MaskOffset];
5542
5543 // The input vector this mask element indexes into.
5544 unsigned Input = (unsigned)Idx / NewElts;
5545
5546 if (Input >= std::size(Inputs)) {
5547 // The mask element does not index into any input vector.
5548 Ops.push_back(-1);
5549 continue;
5550 }
5551
5552 // Turn the index into an offset from the start of the input vector.
5553 Idx -= Input * NewElts;
5554
5555 // Find or create a shuffle vector operand to hold this input.
5556 unsigned OpNo;
5557 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5558 if (InputUsed[OpNo] == Input) {
5559 // This input vector is already an operand.
5560 break;
5561 } else if (InputUsed[OpNo] == -1U) {
5562 // Create a new operand for this input vector.
5563 InputUsed[OpNo] = Input;
5564 break;
5565 }
5566 }
5567
5568 if (OpNo >= std::size(InputUsed)) {
5569 // More than two input vectors used! Give up on trying to create a
5570 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5571 UseBuildVector = true;
5572 break;
5573 }
5574
5575 // Add the mask index for the new shuffle vector.
5576 Ops.push_back(Idx + OpNo * NewElts);
5577 }
5578
5579 if (UseBuildVector) {
5580 LLT EltTy = NarrowTy.getElementType();
5582
5583 // Extract the input elements by hand.
5584 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5585 // The mask element. This indexes into the input.
5586 int Idx = Mask[FirstMaskIdx + MaskOffset];
5587
5588 // The input vector this mask element indexes into.
5589 unsigned Input = (unsigned)Idx / NewElts;
5590
5591 if (Input >= std::size(Inputs)) {
5592 // The mask element is "undef" or indexes off the end of the input.
5593 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5594 continue;
5595 }
5596
5597 // Turn the index into an offset from the start of the input vector.
5598 Idx -= Input * NewElts;
5599
5600 // Extract the vector element by hand.
5601 SVOps.push_back(MIRBuilder
5602 .buildExtractVectorElement(
5603 EltTy, Inputs[Input],
5605 .getReg(0));
5606 }
5607
5608 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5609 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5610 } else if (InputUsed[0] == -1U) {
5611 // No input vectors were used! The result is undefined.
5612 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5613 } else {
5614 Register Op0 = Inputs[InputUsed[0]];
5615 // If only one input was used, use an undefined vector for the other.
5616 Register Op1 = InputUsed[1] == -1U
5617 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5618 : Inputs[InputUsed[1]];
5619 // At least one input vector was used. Create a new shuffle vector.
5620 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5621 }
5622
5623 Ops.clear();
5624 }
5625
5626 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5627 MI.eraseFromParent();
5628 return Legalized;
5629}
5630
5632 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5633 auto &RdxMI = cast<GVecReduce>(MI);
5634
5635 if (TypeIdx != 1)
5636 return UnableToLegalize;
5637
5638 // The semantics of the normal non-sequential reductions allow us to freely
5639 // re-associate the operation.
5640 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5641
5642 if (NarrowTy.isVector() &&
5643 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5644 return UnableToLegalize;
5645
5646 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5647 SmallVector<Register> SplitSrcs;
5648 // If NarrowTy is a scalar then we're being asked to scalarize.
5649 const unsigned NumParts =
5650 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5651 : SrcTy.getNumElements();
5652
5653 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5654 if (NarrowTy.isScalar()) {
5655 if (DstTy != NarrowTy)
5656 return UnableToLegalize; // FIXME: handle implicit extensions.
5657
5658 if (isPowerOf2_32(NumParts)) {
5659 // Generate a tree of scalar operations to reduce the critical path.
5660 SmallVector<Register> PartialResults;
5661 unsigned NumPartsLeft = NumParts;
5662 while (NumPartsLeft > 1) {
5663 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5664 PartialResults.emplace_back(
5666 .buildInstr(ScalarOpc, {NarrowTy},
5667 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5668 .getReg(0));
5669 }
5670 SplitSrcs = PartialResults;
5671 PartialResults.clear();
5672 NumPartsLeft = SplitSrcs.size();
5673 }
5674 assert(SplitSrcs.size() == 1);
5675 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5676 MI.eraseFromParent();
5677 return Legalized;
5678 }
5679 // If we can't generate a tree, then just do sequential operations.
5680 Register Acc = SplitSrcs[0];
5681 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5682 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5683 .getReg(0);
5684 MIRBuilder.buildCopy(DstReg, Acc);
5685 MI.eraseFromParent();
5686 return Legalized;
5687 }
5688 SmallVector<Register> PartialReductions;
5689 for (unsigned Part = 0; Part < NumParts; ++Part) {
5690 PartialReductions.push_back(
5691 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5692 .getReg(0));
5693 }
5694
5695 // If the types involved are powers of 2, we can generate intermediate vector
5696 // ops, before generating a final reduction operation.
5697 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5698 isPowerOf2_32(NarrowTy.getNumElements())) {
5699 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5700 }
5701
5702 Register Acc = PartialReductions[0];
5703 for (unsigned Part = 1; Part < NumParts; ++Part) {
5704 if (Part == NumParts - 1) {
5705 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5706 {Acc, PartialReductions[Part]});
5707 } else {
5708 Acc = MIRBuilder
5709 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5710 .getReg(0);
5711 }
5712 }
5713 MI.eraseFromParent();
5714 return Legalized;
5715}
5716
5719 unsigned int TypeIdx,
5720 LLT NarrowTy) {
5721 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5722 MI.getFirst3RegLLTs();
5723 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5724 DstTy != NarrowTy)
5725 return UnableToLegalize;
5726
5727 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5728 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5729 "Unexpected vecreduce opcode");
5730 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5731 ? TargetOpcode::G_FADD
5732 : TargetOpcode::G_FMUL;
5733
5734 SmallVector<Register> SplitSrcs;
5735 unsigned NumParts = SrcTy.getNumElements();
5736 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5737 Register Acc = ScalarReg;
5738 for (unsigned i = 0; i < NumParts; i++)
5739 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5740 .getReg(0);
5741
5742 MIRBuilder.buildCopy(DstReg, Acc);
5743 MI.eraseFromParent();
5744 return Legalized;
5745}
5746
5748LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5749 LLT SrcTy, LLT NarrowTy,
5750 unsigned ScalarOpc) {
5751 SmallVector<Register> SplitSrcs;
5752 // Split the sources into NarrowTy size pieces.
5753 extractParts(SrcReg, NarrowTy,
5754 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5755 MIRBuilder, MRI);
5756 // We're going to do a tree reduction using vector operations until we have
5757 // one NarrowTy size value left.
5758 while (SplitSrcs.size() > 1) {
5759 SmallVector<Register> PartialRdxs;
5760 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5761 Register LHS = SplitSrcs[Idx];
5762 Register RHS = SplitSrcs[Idx + 1];
5763 // Create the intermediate vector op.
5764 Register Res =
5765 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5766 PartialRdxs.push_back(Res);
5767 }
5768 SplitSrcs = std::move(PartialRdxs);
5769 }
5770 // Finally generate the requested NarrowTy based reduction.
5772 MI.getOperand(1).setReg(SplitSrcs[0]);
5774 return Legalized;
5775}
5776
5779 const LLT HalfTy, const LLT AmtTy) {
5780
5781 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5782 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5783 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5784
5785 if (Amt.isZero()) {
5786 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5787 MI.eraseFromParent();
5788 return Legalized;
5789 }
5790
5791 LLT NVT = HalfTy;
5792 unsigned NVTBits = HalfTy.getSizeInBits();
5793 unsigned VTBits = 2 * NVTBits;
5794
5795 SrcOp Lo(Register(0)), Hi(Register(0));
5796 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5797 if (Amt.ugt(VTBits)) {
5798 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5799 } else if (Amt.ugt(NVTBits)) {
5800 Lo = MIRBuilder.buildConstant(NVT, 0);
5801 Hi = MIRBuilder.buildShl(NVT, InL,
5802 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5803 } else if (Amt == NVTBits) {
5804 Lo = MIRBuilder.buildConstant(NVT, 0);
5805 Hi = InL;
5806 } else {
5807 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5808 auto OrLHS =
5809 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5810 auto OrRHS = MIRBuilder.buildLShr(
5811 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5812 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5813 }
5814 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5815 if (Amt.ugt(VTBits)) {
5816 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5817 } else if (Amt.ugt(NVTBits)) {
5818 Lo = MIRBuilder.buildLShr(NVT, InH,
5819 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5820 Hi = MIRBuilder.buildConstant(NVT, 0);
5821 } else if (Amt == NVTBits) {
5822 Lo = InH;
5823 Hi = MIRBuilder.buildConstant(NVT, 0);
5824 } else {
5825 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5826
5827 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5828 auto OrRHS = MIRBuilder.buildShl(
5829 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5830
5831 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5832 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5833 }
5834 } else {
5835 if (Amt.ugt(VTBits)) {
5837 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5838 } else if (Amt.ugt(NVTBits)) {
5839 Lo = MIRBuilder.buildAShr(NVT, InH,
5840 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5841 Hi = MIRBuilder.buildAShr(NVT, InH,
5842 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5843 } else if (Amt == NVTBits) {
5844 Lo = InH;
5845 Hi = MIRBuilder.buildAShr(NVT, InH,
5846 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5847 } else {
5848 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5849
5850 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5851 auto OrRHS = MIRBuilder.buildShl(
5852 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5853
5854 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5855 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5856 }
5857 }
5858
5859 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5860 MI.eraseFromParent();
5861
5862 return Legalized;
5863}
5864
5865// TODO: Optimize if constant shift amount.
5868 LLT RequestedTy) {
5869 if (TypeIdx == 1) {
5871 narrowScalarSrc(MI, RequestedTy, 2);
5873 return Legalized;
5874 }
5875
5876 Register DstReg = MI.getOperand(0).getReg();
5877 LLT DstTy = MRI.getType(DstReg);
5878 if (DstTy.isVector())
5879 return UnableToLegalize;
5880
5881 Register Amt = MI.getOperand(2).getReg();
5882 LLT ShiftAmtTy = MRI.getType(Amt);
5883 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5884 if (DstEltSize % 2 != 0)
5885 return UnableToLegalize;
5886
5887 // Ignore the input type. We can only go to exactly half the size of the
5888 // input. If that isn't small enough, the resulting pieces will be further
5889 // legalized.
5890 const unsigned NewBitSize = DstEltSize / 2;
5891 const LLT HalfTy = LLT::scalar(NewBitSize);
5892 const LLT CondTy = LLT::scalar(1);
5893
5894 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5895 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5896 ShiftAmtTy);
5897 }
5898
5899 // TODO: Expand with known bits.
5900
5901 // Handle the fully general expansion by an unknown amount.
5902 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5903
5904 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5905 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5906 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5907
5908 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5909 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5910
5911 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5912 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5913 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5914
5915 Register ResultRegs[2];
5916 switch (MI.getOpcode()) {
5917 case TargetOpcode::G_SHL: {
5918 // Short: ShAmt < NewBitSize
5919 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5920
5921 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5922 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5923 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5924
5925 // Long: ShAmt >= NewBitSize
5926 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5927 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5928
5929 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5930 auto Hi = MIRBuilder.buildSelect(
5931 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5932
5933 ResultRegs[0] = Lo.getReg(0);
5934 ResultRegs[1] = Hi.getReg(0);
5935 break;
5936 }
5937 case TargetOpcode::G_LSHR:
5938 case TargetOpcode::G_ASHR: {
5939 // Short: ShAmt < NewBitSize
5940 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5941
5942 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5943 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5944 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5945
5946 // Long: ShAmt >= NewBitSize
5948 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5949 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5950 } else {
5951 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5952 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5953 }
5954 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5955 {InH, AmtExcess}); // Lo from Hi part.
5956
5957 auto Lo = MIRBuilder.buildSelect(
5958 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5959
5960 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5961
5962 ResultRegs[0] = Lo.getReg(0);
5963 ResultRegs[1] = Hi.getReg(0);
5964 break;
5965 }
5966 default:
5967 llvm_unreachable("not a shift");
5968 }
5969
5970 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5971 MI.eraseFromParent();
5972 return Legalized;
5973}
5974
5977 LLT MoreTy) {
5978 assert(TypeIdx == 0 && "Expecting only Idx 0");
5979
5981 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5982 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5984 moreElementsVectorSrc(MI, MoreTy, I);
5985 }
5986
5987 MachineBasicBlock &MBB = *MI.getParent();
5989 moreElementsVectorDst(MI, MoreTy, 0);
5991 return Legalized;
5992}
5993
5994MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5995 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5996 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5997
5998 switch (Opcode) {
5999 default:
6001 "getNeutralElementForVecReduce called with invalid opcode!");
6002 case TargetOpcode::G_VECREDUCE_ADD:
6003 case TargetOpcode::G_VECREDUCE_OR:
6004 case TargetOpcode::G_VECREDUCE_XOR:
6005 case TargetOpcode::G_VECREDUCE_UMAX:
6006 return MIRBuilder.buildConstant(Ty, 0);
6007 case TargetOpcode::G_VECREDUCE_MUL:
6008 return MIRBuilder.buildConstant(Ty, 1);
6009 case TargetOpcode::G_VECREDUCE_AND:
6010 case TargetOpcode::G_VECREDUCE_UMIN:
6013 case TargetOpcode::G_VECREDUCE_SMAX:
6016 case TargetOpcode::G_VECREDUCE_SMIN:
6019 case TargetOpcode::G_VECREDUCE_FADD:
6020 return MIRBuilder.buildFConstant(Ty, -0.0);
6021 case TargetOpcode::G_VECREDUCE_FMUL:
6022 return MIRBuilder.buildFConstant(Ty, 1.0);
6023 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6024 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6025 assert(false && "getNeutralElementForVecReduce unimplemented for "
6026 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6027 }
6028 llvm_unreachable("switch expected to return!");
6029}
6030
6033 LLT MoreTy) {
6034 unsigned Opc = MI.getOpcode();
6035 switch (Opc) {
6036 case TargetOpcode::G_IMPLICIT_DEF:
6037 case TargetOpcode::G_LOAD: {
6038 if (TypeIdx != 0)
6039 return UnableToLegalize;
6041 moreElementsVectorDst(MI, MoreTy, 0);
6043 return Legalized;
6044 }
6045 case TargetOpcode::G_STORE:
6046 if (TypeIdx != 0)
6047 return UnableToLegalize;
6049 moreElementsVectorSrc(MI, MoreTy, 0);
6051 return Legalized;
6052 case TargetOpcode::G_AND:
6053 case TargetOpcode::G_OR:
6054 case TargetOpcode::G_XOR:
6055 case TargetOpcode::G_ADD:
6056 case TargetOpcode::G_SUB:
6057 case TargetOpcode::G_MUL:
6058 case TargetOpcode::G_FADD:
6059 case TargetOpcode::G_FSUB:
6060 case TargetOpcode::G_FMUL:
6061 case TargetOpcode::G_FDIV:
6062 case TargetOpcode::G_FCOPYSIGN:
6063 case TargetOpcode::G_UADDSAT:
6064 case TargetOpcode::G_USUBSAT:
6065 case TargetOpcode::G_SADDSAT:
6066 case TargetOpcode::G_SSUBSAT:
6067 case TargetOpcode::G_SMIN:
6068 case TargetOpcode::G_SMAX:
6069 case TargetOpcode::G_UMIN:
6070 case TargetOpcode::G_UMAX:
6071 case TargetOpcode::G_FMINNUM:
6072 case TargetOpcode::G_FMAXNUM:
6073 case TargetOpcode::G_FMINNUM_IEEE:
6074 case TargetOpcode::G_FMAXNUM_IEEE:
6075 case TargetOpcode::G_FMINIMUM:
6076 case TargetOpcode::G_FMAXIMUM:
6077 case TargetOpcode::G_STRICT_FADD:
6078 case TargetOpcode::G_STRICT_FSUB:
6079 case TargetOpcode::G_STRICT_FMUL:
6080 case TargetOpcode::G_SHL:
6081 case TargetOpcode::G_ASHR:
6082 case TargetOpcode::G_LSHR: {
6084 moreElementsVectorSrc(MI, MoreTy, 1);
6085 moreElementsVectorSrc(MI, MoreTy, 2);
6086 moreElementsVectorDst(MI, MoreTy, 0);
6088 return Legalized;
6089 }
6090 case TargetOpcode::G_FMA:
6091 case TargetOpcode::G_STRICT_FMA:
6092 case TargetOpcode::G_FSHR:
6093 case TargetOpcode::G_FSHL: {
6095 moreElementsVectorSrc(MI, MoreTy, 1);
6096 moreElementsVectorSrc(MI, MoreTy, 2);
6097 moreElementsVectorSrc(MI, MoreTy, 3);
6098 moreElementsVectorDst(MI, MoreTy, 0);
6100 return Legalized;
6101 }
6102 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6103 case TargetOpcode::G_EXTRACT:
6104 if (TypeIdx != 1)
6105 return UnableToLegalize;
6107 moreElementsVectorSrc(MI, MoreTy, 1);
6109 return Legalized;
6110 case TargetOpcode::G_INSERT:
6111 case TargetOpcode::G_INSERT_VECTOR_ELT:
6112 case TargetOpcode::G_FREEZE:
6113 case TargetOpcode::G_FNEG:
6114 case TargetOpcode::G_FABS:
6115 case TargetOpcode::G_FSQRT:
6116 case TargetOpcode::G_FCEIL:
6117 case TargetOpcode::G_FFLOOR:
6118 case TargetOpcode::G_FNEARBYINT:
6119 case TargetOpcode::G_FRINT:
6120 case TargetOpcode::G_INTRINSIC_ROUND:
6121 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6122 case TargetOpcode::G_INTRINSIC_TRUNC:
6123 case TargetOpcode::G_BSWAP:
6124 case TargetOpcode::G_FCANONICALIZE:
6125 case TargetOpcode::G_SEXT_INREG:
6126 case TargetOpcode::G_ABS:
6127 if (TypeIdx != 0)
6128 return UnableToLegalize;
6130 moreElementsVectorSrc(MI, MoreTy, 1);
6131 moreElementsVectorDst(MI, MoreTy, 0);
6133 return Legalized;
6134 case TargetOpcode::G_SELECT: {
6135 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6136 if (TypeIdx == 1) {
6137 if (!CondTy.isScalar() ||
6138 DstTy.getElementCount() != MoreTy.getElementCount())
6139 return UnableToLegalize;
6140
6141 // This is turning a scalar select of vectors into a vector
6142 // select. Broadcast the select condition.
6143 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6145 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6147 return Legalized;
6148 }
6149
6150 if (CondTy.isVector())
6151 return UnableToLegalize;
6152
6154 moreElementsVectorSrc(MI, MoreTy, 2);
6155 moreElementsVectorSrc(MI, MoreTy, 3);
6156 moreElementsVectorDst(MI, MoreTy, 0);
6158 return Legalized;
6159 }
6160 case TargetOpcode::G_UNMERGE_VALUES:
6161 return UnableToLegalize;
6162 case TargetOpcode::G_PHI:
6163 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6164 case TargetOpcode::G_SHUFFLE_VECTOR:
6165 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6166 case TargetOpcode::G_BUILD_VECTOR: {
6168 for (auto Op : MI.uses()) {
6169 Elts.push_back(Op.getReg());
6170 }
6171
6172 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6174 }
6175
6177 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6178 MI.eraseFromParent();
6179 return Legalized;
6180 }
6181 case TargetOpcode::G_SEXT:
6182 case TargetOpcode::G_ZEXT:
6183 case TargetOpcode::G_ANYEXT:
6184 case TargetOpcode::G_TRUNC:
6185 case TargetOpcode::G_FPTRUNC:
6186 case TargetOpcode::G_FPEXT:
6187 case TargetOpcode::G_FPTOSI:
6188 case TargetOpcode::G_FPTOUI:
6189 case TargetOpcode::G_FPTOSI_SAT:
6190 case TargetOpcode::G_FPTOUI_SAT:
6191 case TargetOpcode::G_SITOFP:
6192 case TargetOpcode::G_UITOFP: {
6194 LLT SrcExtTy;
6195 LLT DstExtTy;
6196 if (TypeIdx == 0) {
6197 DstExtTy = MoreTy;
6198 SrcExtTy = LLT::fixed_vector(
6199 MoreTy.getNumElements(),
6200 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6201 } else {
6202 DstExtTy = LLT::fixed_vector(
6203 MoreTy.getNumElements(),
6204 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6205 SrcExtTy = MoreTy;
6206 }
6207 moreElementsVectorSrc(MI, SrcExtTy, 1);
6208 moreElementsVectorDst(MI, DstExtTy, 0);
6210 return Legalized;
6211 }
6212 case TargetOpcode::G_ICMP:
6213 case TargetOpcode::G_FCMP: {
6214 if (TypeIdx != 1)
6215 return UnableToLegalize;
6216
6218 moreElementsVectorSrc(MI, MoreTy, 2);
6219 moreElementsVectorSrc(MI, MoreTy, 3);
6220 LLT CondTy = LLT::fixed_vector(
6221 MoreTy.getNumElements(),
6222 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6223 moreElementsVectorDst(MI, CondTy, 0);
6225 return Legalized;
6226 }
6227 case TargetOpcode::G_BITCAST: {
6228 if (TypeIdx != 0)
6229 return UnableToLegalize;
6230
6231 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6232 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6233
6234 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6235 if (coefficient % DstTy.getNumElements() != 0)
6236 return UnableToLegalize;
6237
6238 coefficient = coefficient / DstTy.getNumElements();
6239
6240 LLT NewTy = SrcTy.changeElementCount(
6241 ElementCount::get(coefficient, MoreTy.isScalable()));
6243 moreElementsVectorSrc(MI, NewTy, 1);
6244 moreElementsVectorDst(MI, MoreTy, 0);
6246 return Legalized;
6247 }
6248 case TargetOpcode::G_VECREDUCE_FADD:
6249 case TargetOpcode::G_VECREDUCE_FMUL:
6250 case TargetOpcode::G_VECREDUCE_ADD:
6251 case TargetOpcode::G_VECREDUCE_MUL:
6252 case TargetOpcode::G_VECREDUCE_AND:
6253 case TargetOpcode::G_VECREDUCE_OR:
6254 case TargetOpcode::G_VECREDUCE_XOR:
6255 case TargetOpcode::G_VECREDUCE_SMAX:
6256 case TargetOpcode::G_VECREDUCE_SMIN:
6257 case TargetOpcode::G_VECREDUCE_UMAX:
6258 case TargetOpcode::G_VECREDUCE_UMIN: {
6259 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6260 MachineOperand &MO = MI.getOperand(1);
6261 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6262 auto NeutralElement = getNeutralElementForVecReduce(
6263 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6264
6266 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6267 i != e; i++) {
6268 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6269 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6270 NeutralElement, Idx);
6271 }
6272
6274 MO.setReg(NewVec.getReg(0));
6276 return Legalized;
6277 }
6278
6279 default:
6280 return UnableToLegalize;
6281 }
6282}
6283
6286 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6287 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6288 unsigned MaskNumElts = Mask.size();
6289 unsigned SrcNumElts = SrcTy.getNumElements();
6290 LLT DestEltTy = DstTy.getElementType();
6291
6292 if (MaskNumElts == SrcNumElts)
6293 return Legalized;
6294
6295 if (MaskNumElts < SrcNumElts) {
6296 // Extend mask to match new destination vector size with
6297 // undef values.
6298 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6299 llvm::copy(Mask, NewMask.begin());
6300
6301 moreElementsVectorDst(MI, SrcTy, 0);
6303 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6304 MI.getOperand(1).getReg(),
6305 MI.getOperand(2).getReg(), NewMask);
6306 MI.eraseFromParent();
6307
6308 return Legalized;
6309 }
6310
6311 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6312 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6313 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6314
6315 // Create new source vectors by concatenating the initial
6316 // source vectors with undefined vectors of the same size.
6317 auto Undef = MIRBuilder.buildUndef(SrcTy);
6318 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6319 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6320 MOps1[0] = MI.getOperand(1).getReg();
6321 MOps2[0] = MI.getOperand(2).getReg();
6322
6323 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6324 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6325
6326 // Readjust mask for new input vector length.
6327 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6328 for (unsigned I = 0; I != MaskNumElts; ++I) {
6329 int Idx = Mask[I];
6330 if (Idx >= static_cast<int>(SrcNumElts))
6331 Idx += PaddedMaskNumElts - SrcNumElts;
6332 MappedOps[I] = Idx;
6333 }
6334
6335 // If we got more elements than required, extract subvector.
6336 if (MaskNumElts != PaddedMaskNumElts) {
6337 auto Shuffle =
6338 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6339
6340 SmallVector<Register, 16> Elts(MaskNumElts);
6341 for (unsigned I = 0; I < MaskNumElts; ++I) {
6342 Elts[I] =
6344 .getReg(0);
6345 }
6346 MIRBuilder.buildBuildVector(DstReg, Elts);
6347 } else {
6348 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6349 }
6350
6351 MI.eraseFromParent();
6353}
6354
6357 unsigned int TypeIdx, LLT MoreTy) {
6358 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6359 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6360 unsigned NumElts = DstTy.getNumElements();
6361 unsigned WidenNumElts = MoreTy.getNumElements();
6362
6363 if (DstTy.isVector() && Src1Ty.isVector() &&
6364 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6366 }
6367
6368 if (TypeIdx != 0)
6369 return UnableToLegalize;
6370
6371 // Expect a canonicalized shuffle.
6372 if (DstTy != Src1Ty || DstTy != Src2Ty)
6373 return UnableToLegalize;
6374
6375 moreElementsVectorSrc(MI, MoreTy, 1);
6376 moreElementsVectorSrc(MI, MoreTy, 2);
6377
6378 // Adjust mask based on new input vector length.
6379 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6380 for (unsigned I = 0; I != NumElts; ++I) {
6381 int Idx = Mask[I];
6382 if (Idx < static_cast<int>(NumElts))
6383 NewMask[I] = Idx;
6384 else
6385 NewMask[I] = Idx - NumElts + WidenNumElts;
6386 }
6387 moreElementsVectorDst(MI, MoreTy, 0);
6389 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6390 MI.getOperand(1).getReg(),
6391 MI.getOperand(2).getReg(), NewMask);
6392 MI.eraseFromParent();
6393 return Legalized;
6394}
6395
6396void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6397 ArrayRef<Register> Src1Regs,
6398 ArrayRef<Register> Src2Regs,
6399 LLT NarrowTy) {
6401 unsigned SrcParts = Src1Regs.size();
6402 unsigned DstParts = DstRegs.size();
6403
6404 unsigned DstIdx = 0; // Low bits of the result.
6405 Register FactorSum =
6406 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6407 DstRegs[DstIdx] = FactorSum;
6408
6409 unsigned CarrySumPrevDstIdx;
6411
6412 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6413 // Collect low parts of muls for DstIdx.
6414 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6415 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6417 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6418 Factors.push_back(Mul.getReg(0));
6419 }
6420 // Collect high parts of muls from previous DstIdx.
6421 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6422 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6423 MachineInstrBuilder Umulh =
6424 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6425 Factors.push_back(Umulh.getReg(0));
6426 }
6427 // Add CarrySum from additions calculated for previous DstIdx.
6428 if (DstIdx != 1) {
6429 Factors.push_back(CarrySumPrevDstIdx);
6430 }
6431
6432 Register CarrySum;
6433 // Add all factors and accumulate all carries into CarrySum.
6434 if (DstIdx != DstParts - 1) {
6435 MachineInstrBuilder Uaddo =
6436 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6437 FactorSum = Uaddo.getReg(0);
6438 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6439 for (unsigned i = 2; i < Factors.size(); ++i) {
6440 MachineInstrBuilder Uaddo =
6441 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6442 FactorSum = Uaddo.getReg(0);
6443 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6444 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6445 }
6446 } else {
6447 // Since value for the next index is not calculated, neither is CarrySum.
6448 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6449 for (unsigned i = 2; i < Factors.size(); ++i)
6450 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6451 }
6452
6453 CarrySumPrevDstIdx = CarrySum;
6454 DstRegs[DstIdx] = FactorSum;
6455 Factors.clear();
6456 }
6457}
6458
6461 LLT NarrowTy) {
6462 if (TypeIdx != 0)
6463 return UnableToLegalize;
6464
6465 Register DstReg = MI.getOperand(0).getReg();
6466 LLT DstType = MRI.getType(DstReg);
6467 // FIXME: add support for vector types
6468 if (DstType.isVector())
6469 return UnableToLegalize;
6470
6471 unsigned Opcode = MI.getOpcode();
6472 unsigned OpO, OpE, OpF;
6473 switch (Opcode) {
6474 case TargetOpcode::G_SADDO:
6475 case TargetOpcode::G_SADDE:
6476 case TargetOpcode::G_UADDO:
6477 case TargetOpcode::G_UADDE:
6478 case TargetOpcode::G_ADD:
6479 OpO = TargetOpcode::G_UADDO;
6480 OpE = TargetOpcode::G_UADDE;
6481 OpF = TargetOpcode::G_UADDE;
6482 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6483 OpF = TargetOpcode::G_SADDE;
6484 break;
6485 case TargetOpcode::G_SSUBO:
6486 case TargetOpcode::G_SSUBE:
6487 case TargetOpcode::G_USUBO:
6488 case TargetOpcode::G_USUBE:
6489 case TargetOpcode::G_SUB:
6490 OpO = TargetOpcode::G_USUBO;
6491 OpE = TargetOpcode::G_USUBE;
6492 OpF = TargetOpcode::G_USUBE;
6493 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6494 OpF = TargetOpcode::G_SSUBE;
6495 break;
6496 default:
6497 llvm_unreachable("Unexpected add/sub opcode!");
6498 }
6499
6500 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6501 unsigned NumDefs = MI.getNumExplicitDefs();
6502 Register Src1 = MI.getOperand(NumDefs).getReg();
6503 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
6504 Register CarryDst, CarryIn;
6505 if (NumDefs == 2)
6506 CarryDst = MI.getOperand(1).getReg();
6507 if (MI.getNumOperands() == NumDefs + 3)
6508 CarryIn = MI.getOperand(NumDefs + 2).getReg();
6509
6510 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6511 LLT LeftoverTy, DummyTy;
6512 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
6513 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6514 MIRBuilder, MRI);
6515 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6516 MRI);
6517
6518 int NarrowParts = Src1Regs.size();
6519 Src1Regs.append(Src1Left);
6520 Src2Regs.append(Src2Left);
6521 DstRegs.reserve(Src1Regs.size());
6522
6523 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6524 Register DstReg =
6525 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
6526 Register CarryOut;
6527 // Forward the final carry-out to the destination register
6528 if (i == e - 1 && CarryDst)
6529 CarryOut = CarryDst;
6530 else
6531 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
6532
6533 if (!CarryIn) {
6534 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6535 {Src1Regs[i], Src2Regs[i]});
6536 } else if (i == e - 1) {
6537 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6538 {Src1Regs[i], Src2Regs[i], CarryIn});
6539 } else {
6540 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6541 {Src1Regs[i], Src2Regs[i], CarryIn});
6542 }
6543
6544 DstRegs.push_back(DstReg);
6545 CarryIn = CarryOut;
6546 }
6547 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6548 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6549 ArrayRef(DstRegs).drop_front(NarrowParts));
6550
6551 MI.eraseFromParent();
6552 return Legalized;
6553}
6554
6557 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
6558
6559 LLT Ty = MRI.getType(DstReg);
6560 if (Ty.isVector())
6561 return UnableToLegalize;
6562
6563 unsigned Size = Ty.getSizeInBits();
6564 unsigned NarrowSize = NarrowTy.getSizeInBits();
6565 if (Size % NarrowSize != 0)
6566 return UnableToLegalize;
6567
6568 unsigned NumParts = Size / NarrowSize;
6569 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6570 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6571
6572 SmallVector<Register, 2> Src1Parts, Src2Parts;
6573 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6574 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6575 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6576 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6577
6578 // Take only high half of registers if this is high mul.
6579 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6580 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6581 MI.eraseFromParent();
6582 return Legalized;
6583}
6584
6587 LLT NarrowTy) {
6588 if (TypeIdx != 0)
6589 return UnableToLegalize;
6590
6591 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6592
6593 Register Src = MI.getOperand(1).getReg();
6594 LLT SrcTy = MRI.getType(Src);
6595
6596 // If all finite floats fit into the narrowed integer type, we can just swap
6597 // out the result type. This is practically only useful for conversions from
6598 // half to at least 16-bits, so just handle the one case.
6599 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6600 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6601 return UnableToLegalize;
6602
6604 narrowScalarDst(MI, NarrowTy, 0,
6605 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6607 return Legalized;
6608}
6609
6612 LLT NarrowTy) {
6613 if (TypeIdx != 1)
6614 return UnableToLegalize;
6615
6616 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6617
6618 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6619 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6620 // NarrowSize.
6621 if (SizeOp1 % NarrowSize != 0)
6622 return UnableToLegalize;
6623 int NumParts = SizeOp1 / NarrowSize;
6624
6625 SmallVector<Register, 2> SrcRegs, DstRegs;
6627 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6628 MIRBuilder, MRI);
6629
6630 Register OpReg = MI.getOperand(0).getReg();
6631 uint64_t OpStart = MI.getOperand(2).getImm();
6632 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6633 for (int i = 0; i < NumParts; ++i) {
6634 unsigned SrcStart = i * NarrowSize;
6635
6636 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6637 // No part of the extract uses this subregister, ignore it.
6638 continue;
6639 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6640 // The entire subregister is extracted, forward the value.
6641 DstRegs.push_back(SrcRegs[i]);
6642 continue;
6643 }
6644
6645 // OpSegStart is where this destination segment would start in OpReg if it
6646 // extended infinitely in both directions.
6647 int64_t ExtractOffset;
6648 uint64_t SegSize;
6649 if (OpStart < SrcStart) {
6650 ExtractOffset = 0;
6651 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6652 } else {
6653 ExtractOffset = OpStart - SrcStart;
6654 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6655 }
6656
6657 Register SegReg = SrcRegs[i];
6658 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6659 // A genuine extract is needed.
6660 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6661 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6662 }
6663
6664 DstRegs.push_back(SegReg);
6665 }
6666
6667 Register DstReg = MI.getOperand(0).getReg();
6668 if (MRI.getType(DstReg).isVector())
6669 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6670 else if (DstRegs.size() > 1)
6671 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6672 else
6673 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6674 MI.eraseFromParent();
6675 return Legalized;
6676}
6677
6680 LLT NarrowTy) {
6681 // FIXME: Don't know how to handle secondary types yet.
6682 if (TypeIdx != 0)
6683 return UnableToLegalize;
6684
6685 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6687 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6688 LLT LeftoverTy;
6689 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6690 LeftoverRegs, MIRBuilder, MRI);
6691
6692 SrcRegs.append(LeftoverRegs);
6693
6694 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6695 Register OpReg = MI.getOperand(2).getReg();
6696 uint64_t OpStart = MI.getOperand(3).getImm();
6697 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6698 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6699 unsigned DstStart = I * NarrowSize;
6700
6701 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6702 // The entire subregister is defined by this insert, forward the new
6703 // value.
6704 DstRegs.push_back(OpReg);
6705 continue;
6706 }
6707
6708 Register SrcReg = SrcRegs[I];
6709 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6710 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6711 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6712 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6713 }
6714
6715 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6716 // No part of the insert affects this subregister, forward the original.
6717 DstRegs.push_back(SrcReg);
6718 continue;
6719 }
6720
6721 // OpSegStart is where this destination segment would start in OpReg if it
6722 // extended infinitely in both directions.
6723 int64_t ExtractOffset, InsertOffset;
6724 uint64_t SegSize;
6725 if (OpStart < DstStart) {
6726 InsertOffset = 0;
6727 ExtractOffset = DstStart - OpStart;
6728 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6729 } else {
6730 InsertOffset = OpStart - DstStart;
6731 ExtractOffset = 0;
6732 SegSize =
6733 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6734 }
6735
6736 Register SegReg = OpReg;
6737 if (ExtractOffset != 0 || SegSize != OpSize) {
6738 // A genuine extract is needed.
6739 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6740 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6741 }
6742
6743 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6744 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6745 DstRegs.push_back(DstReg);
6746 }
6747
6748 uint64_t WideSize = DstRegs.size() * NarrowSize;
6749 Register DstReg = MI.getOperand(0).getReg();
6750 if (WideSize > RegTy.getSizeInBits()) {
6751 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6752 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6753 MIRBuilder.buildTrunc(DstReg, MergeReg);
6754 } else
6755 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6756
6757 MI.eraseFromParent();
6758 return Legalized;
6759}
6760
6763 LLT NarrowTy) {
6764 Register DstReg = MI.getOperand(0).getReg();
6765 LLT DstTy = MRI.getType(DstReg);
6766
6767 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6768
6769 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6770 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6771 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6772 LLT LeftoverTy;
6773 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6774 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6775 return UnableToLegalize;
6776
6777 LLT Unused;
6778 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6779 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6780 llvm_unreachable("inconsistent extractParts result");
6781
6782 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6783 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6784 {Src0Regs[I], Src1Regs[I]});
6785 DstRegs.push_back(Inst.getReg(0));
6786 }
6787
6788 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6789 auto Inst = MIRBuilder.buildInstr(
6790 MI.getOpcode(),
6791 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6792 DstLeftoverRegs.push_back(Inst.getReg(0));
6793 }
6794
6795 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6796 LeftoverTy, DstLeftoverRegs);
6797
6798 MI.eraseFromParent();
6799 return Legalized;
6800}
6801
6804 LLT NarrowTy) {
6805 if (TypeIdx != 0)
6806 return UnableToLegalize;
6807
6808 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6809
6810 LLT DstTy = MRI.getType(DstReg);
6811 if (DstTy.isVector())
6812 return UnableToLegalize;
6813
6815 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6816 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6817 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6818
6819 MI.eraseFromParent();
6820 return Legalized;
6821}
6822
6825 LLT NarrowTy) {
6826 if (TypeIdx != 0)
6827 return UnableToLegalize;
6828
6829 Register CondReg = MI.getOperand(1).getReg();
6830 LLT CondTy = MRI.getType(CondReg);
6831 if (CondTy.isVector()) // TODO: Handle vselect
6832 return UnableToLegalize;
6833
6834 Register DstReg = MI.getOperand(0).getReg();
6835 LLT DstTy = MRI.getType(DstReg);
6836
6837 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6838 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6839 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6840 LLT LeftoverTy;
6841 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6842 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6843 return UnableToLegalize;
6844
6845 LLT Unused;
6846 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6847 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6848 llvm_unreachable("inconsistent extractParts result");
6849
6850 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6851 auto Select = MIRBuilder.buildSelect(NarrowTy,
6852 CondReg, Src1Regs[I], Src2Regs[I]);
6853 DstRegs.push_back(Select.getReg(0));
6854 }
6855
6856 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6858 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6859 DstLeftoverRegs.push_back(Select.getReg(0));
6860 }
6861
6862 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6863 LeftoverTy, DstLeftoverRegs);
6864
6865 MI.eraseFromParent();
6866 return Legalized;
6867}
6868
6871 LLT NarrowTy) {
6872 if (TypeIdx != 1)
6873 return UnableToLegalize;
6874
6875 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6876 unsigned NarrowSize = NarrowTy.getSizeInBits();
6877
6878 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6879 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6880
6882 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6883 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6884 auto C_0 = B.buildConstant(NarrowTy, 0);
6885 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6886 UnmergeSrc.getReg(1), C_0);
6887 auto LoCTLZ = IsUndef ?
6888 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6889 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6890 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6891 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6892 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6893 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6894
6895 MI.eraseFromParent();
6896 return Legalized;
6897 }
6898
6899 return UnableToLegalize;
6900}
6901
6904 LLT NarrowTy) {
6905 if (TypeIdx != 1)
6906 return UnableToLegalize;
6907
6908 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6909 unsigned NarrowSize = NarrowTy.getSizeInBits();
6910
6911 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6912 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6913
6915 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6916 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6917 auto C_0 = B.buildConstant(NarrowTy, 0);
6918 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6919 UnmergeSrc.getReg(0), C_0);
6920 auto HiCTTZ = IsUndef ?
6921 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6922 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6923 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6924 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6925 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6926 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6927
6928 MI.eraseFromParent();
6929 return Legalized;
6930 }
6931
6932 return UnableToLegalize;
6933}
6934
6937 LLT NarrowTy) {
6938 if (TypeIdx != 1)
6939 return UnableToLegalize;
6940
6941 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6942 unsigned NarrowSize = NarrowTy.getSizeInBits();
6943
6944 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6945 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6946
6947 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6948 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6949 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6950
6951 MI.eraseFromParent();
6952 return Legalized;
6953 }
6954
6955 return UnableToLegalize;
6956}
6957
6960 LLT NarrowTy) {
6961 if (TypeIdx != 1)
6962 return UnableToLegalize;
6963
6965 Register ExpReg = MI.getOperand(2).getReg();
6966 LLT ExpTy = MRI.getType(ExpReg);
6967
6968 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6969
6970 // Clamp the exponent to the range of the target type.
6971 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6972 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6973 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6974 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6975
6976 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6978 MI.getOperand(2).setReg(Trunc.getReg(0));
6980 return Legalized;
6981}
6982
6985 unsigned Opc = MI.getOpcode();
6986 const auto &TII = MIRBuilder.getTII();
6987 auto isSupported = [this](const LegalityQuery &Q) {
6988 auto QAction = LI.getAction(Q).Action;
6989 return QAction == Legal || QAction == Libcall || QAction == Custom;
6990 };
6991 switch (Opc) {
6992 default:
6993 return UnableToLegalize;
6994 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6995 // This trivially expands to CTLZ.
6997 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6999 return Legalized;
7000 }
7001 case TargetOpcode::G_CTLZ: {
7002 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7003 unsigned Len = SrcTy.getSizeInBits();
7004
7005 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7006 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7007 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7008 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7009 auto ICmp = MIRBuilder.buildICmp(
7010 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7011 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7012 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7013 MI.eraseFromParent();
7014 return Legalized;
7015 }
7016 // for now, we do this:
7017 // NewLen = NextPowerOf2(Len);
7018 // x = x | (x >> 1);
7019 // x = x | (x >> 2);
7020 // ...
7021 // x = x | (x >>16);
7022 // x = x | (x >>32); // for 64-bit input
7023 // Upto NewLen/2
7024 // return Len - popcount(x);
7025 //
7026 // Ref: "Hacker's Delight" by Henry Warren
7027 Register Op = SrcReg;
7028 unsigned NewLen = PowerOf2Ceil(Len);
7029 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7030 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7031 auto MIBOp = MIRBuilder.buildOr(
7032 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7033 Op = MIBOp.getReg(0);
7034 }
7035 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7036 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7037 MIBPop);
7038 MI.eraseFromParent();
7039 return Legalized;
7040 }
7041 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7042 // This trivially expands to CTTZ.
7044 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7046 return Legalized;
7047 }
7048 case TargetOpcode::G_CTTZ: {
7049 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7050
7051 unsigned Len = SrcTy.getSizeInBits();
7052 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7053 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7054 // zero.
7055 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7056 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7057 auto ICmp = MIRBuilder.buildICmp(
7058 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7059 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7060 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7061 MI.eraseFromParent();
7062 return Legalized;
7063 }
7064 // for now, we use: { return popcount(~x & (x - 1)); }
7065 // unless the target has ctlz but not ctpop, in which case we use:
7066 // { return 32 - nlz(~x & (x-1)); }
7067 // Ref: "Hacker's Delight" by Henry Warren
7068 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7069 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7070 auto MIBTmp = MIRBuilder.buildAnd(
7071 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7072 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7073 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7074 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7075 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7076 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7077 MI.eraseFromParent();
7078 return Legalized;
7079 }
7081 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7082 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7084 return Legalized;
7085 }
7086 case TargetOpcode::G_CTPOP: {
7087 Register SrcReg = MI.getOperand(1).getReg();
7088 LLT Ty = MRI.getType(SrcReg);
7089 unsigned Size = Ty.getSizeInBits();
7091
7092 // Count set bits in blocks of 2 bits. Default approach would be
7093 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7094 // We use following formula instead:
7095 // B2Count = val - { (val >> 1) & 0x55555555 }
7096 // since it gives same result in blocks of 2 with one instruction less.
7097 auto C_1 = B.buildConstant(Ty, 1);
7098 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7099 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7100 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7101 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7102 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7103
7104 // In order to get count in blocks of 4 add values from adjacent block of 2.
7105 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7106 auto C_2 = B.buildConstant(Ty, 2);
7107 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7108 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7109 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7110 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7111 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7112 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7113
7114 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7115 // addition since count value sits in range {0,...,8} and 4 bits are enough
7116 // to hold such binary values. After addition high 4 bits still hold count
7117 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7118 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7119 auto C_4 = B.buildConstant(Ty, 4);
7120 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7121 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7122 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7123 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7124 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7125
7126 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7127 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7128 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7129 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7130
7131 // Shift count result from 8 high bits to low bits.
7132 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7133
7134 auto IsMulSupported = [this](const LLT Ty) {
7135 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7136 return Action == Legal || Action == WidenScalar || Action == Custom;
7137 };
7138 if (IsMulSupported(Ty)) {
7139 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7140 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7141 } else {
7142 auto ResTmp = B8Count;
7143 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7144 auto ShiftC = B.buildConstant(Ty, Shift);
7145 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7146 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7147 }
7148 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7149 }
7150 MI.eraseFromParent();
7151 return Legalized;
7152 }
7153 }
7154}
7155
7156// Check that (every element of) Reg is undef or not an exact multiple of BW.
7158 Register Reg, unsigned BW) {
7159 return matchUnaryPredicate(
7160 MRI, Reg,
7161 [=](const Constant *C) {
7162 // Null constant here means an undef.
7163 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7164 return !CI || CI->getValue().urem(BW) != 0;
7165 },
7166 /*AllowUndefs*/ true);
7167}
7168
7171 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7172 LLT Ty = MRI.getType(Dst);
7173 LLT ShTy = MRI.getType(Z);
7174
7175 unsigned BW = Ty.getScalarSizeInBits();
7176
7177 if (!isPowerOf2_32(BW))
7178 return UnableToLegalize;
7179
7180 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7181 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7182
7183 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7184 // fshl X, Y, Z -> fshr X, Y, -Z
7185 // fshr X, Y, Z -> fshl X, Y, -Z
7186 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7187 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7188 } else {
7189 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7190 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7191 auto One = MIRBuilder.buildConstant(ShTy, 1);
7192 if (IsFSHL) {
7193 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7194 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7195 } else {
7196 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7197 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7198 }
7199
7200 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7201 }
7202
7203 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7204 MI.eraseFromParent();
7205 return Legalized;
7206}
7207
7210 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7211 LLT Ty = MRI.getType(Dst);
7212 LLT ShTy = MRI.getType(Z);
7213
7214 const unsigned BW = Ty.getScalarSizeInBits();
7215 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7216
7217 Register ShX, ShY;
7218 Register ShAmt, InvShAmt;
7219
7220 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7221 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7222 // fshl: X << C | Y >> (BW - C)
7223 // fshr: X << (BW - C) | Y >> C
7224 // where C = Z % BW is not zero
7225 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7226 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7227 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7228 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7229 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7230 } else {
7231 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7232 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7233 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7234 if (isPowerOf2_32(BW)) {
7235 // Z % BW -> Z & (BW - 1)
7236 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7237 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7238 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7239 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7240 } else {
7241 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7242 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7243 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7244 }
7245
7246 auto One = MIRBuilder.buildConstant(ShTy, 1);
7247 if (IsFSHL) {
7248 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7249 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7250 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7251 } else {
7252 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7253 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7254 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7255 }
7256 }
7257
7258 MIRBuilder.buildOr(Dst, ShX, ShY);
7259 MI.eraseFromParent();
7260 return Legalized;
7261}
7262
7265 // These operations approximately do the following (while avoiding undefined
7266 // shifts by BW):
7267 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7268 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7269 Register Dst = MI.getOperand(0).getReg();
7270 LLT Ty = MRI.getType(Dst);
7271 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7272
7273 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7274 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7275
7276 // TODO: Use smarter heuristic that accounts for vector legalization.
7277 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7278 return lowerFunnelShiftAsShifts(MI);
7279
7280 // This only works for powers of 2, fallback to shifts if it fails.
7281 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7282 if (Result == UnableToLegalize)
7283 return lowerFunnelShiftAsShifts(MI);
7284 return Result;
7285}
7286
7288 auto [Dst, Src] = MI.getFirst2Regs();
7289 LLT DstTy = MRI.getType(Dst);
7290 LLT SrcTy = MRI.getType(Src);
7291
7292 uint32_t DstTySize = DstTy.getSizeInBits();
7293 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7294 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7295
7296 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7297 !isPowerOf2_32(SrcTyScalarSize))
7298 return UnableToLegalize;
7299
7300 // The step between extend is too large, split it by creating an intermediate
7301 // extend instruction
7302 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7303 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7304 // If the destination type is illegal, split it into multiple statements
7305 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7306 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7307 // Unmerge the vector
7308 LLT EltTy = MidTy.changeElementCount(
7310 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7311
7312 // ZExt the vectors
7313 LLT ZExtResTy = DstTy.changeElementCount(
7315 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7316 {UnmergeSrc.getReg(0)});
7317 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7318 {UnmergeSrc.getReg(1)});
7319
7320 // Merge the ending vectors
7321 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7322
7323 MI.eraseFromParent();
7324 return Legalized;
7325 }
7326 return UnableToLegalize;
7327}
7328
7330 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7332 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7333 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7334 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7335 // %lo16(<4 x s16>) = G_TRUNC %inlo
7336 // %hi16(<4 x s16>) = G_TRUNC %inhi
7337 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7338 // %res(<8 x s8>) = G_TRUNC %in16
7339
7340 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7341
7342 Register DstReg = MI.getOperand(0).getReg();
7343 Register SrcReg = MI.getOperand(1).getReg();
7344 LLT DstTy = MRI.getType(DstReg);
7345 LLT SrcTy = MRI.getType(SrcReg);
7346
7347 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7349 isPowerOf2_32(SrcTy.getNumElements()) &&
7351 // Split input type.
7352 LLT SplitSrcTy = SrcTy.changeElementCount(
7354
7355 // First, split the source into two smaller vectors.
7356 SmallVector<Register, 2> SplitSrcs;
7357 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7358
7359 // Truncate the splits into intermediate narrower elements.
7360 LLT InterTy;
7361 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7362 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7363 else
7364 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7365 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
7366 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
7367 }
7368
7369 // Combine the new truncates into one vector
7371 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7372
7373 // Truncate the new vector to the final result type
7374 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7375 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7376 else
7377 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7378
7379 MI.eraseFromParent();
7380
7381 return Legalized;
7382 }
7383 return UnableToLegalize;
7384}
7385
7388 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7389 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7390 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7391 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7392 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7393 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7394 MI.eraseFromParent();
7395 return Legalized;
7396}
7397
7399 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7400
7401 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7402 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7403
7405
7406 // If a rotate in the other direction is supported, use it.
7407 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7408 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7409 isPowerOf2_32(EltSizeInBits))
7410 return lowerRotateWithReverseRotate(MI);
7411
7412 // If a funnel shift is supported, use it.
7413 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7414 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7415 bool IsFShLegal = false;
7416 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7417 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7418 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7419 Register R3) {
7420 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7421 MI.eraseFromParent();
7422 return Legalized;
7423 };
7424 // If a funnel shift in the other direction is supported, use it.
7425 if (IsFShLegal) {
7426 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7427 } else if (isPowerOf2_32(EltSizeInBits)) {
7428 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7429 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7430 }
7431 }
7432
7433 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7434 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7435 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7436 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7437 Register ShVal;
7438 Register RevShiftVal;
7439 if (isPowerOf2_32(EltSizeInBits)) {
7440 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7441 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7442 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7443 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7444 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7445 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7446 RevShiftVal =
7447 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7448 } else {
7449 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7450 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7451 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7452 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7453 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7454 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7455 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7456 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7457 RevShiftVal =
7458 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7459 }
7460 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7461 MI.eraseFromParent();
7462 return Legalized;
7463}
7464
7465// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7466// representation.
7469 auto [Dst, Src] = MI.getFirst2Regs();
7470 const LLT S64 = LLT::scalar(64);
7471 const LLT S32 = LLT::scalar(32);
7472 const LLT S1 = LLT::scalar(1);
7473
7474 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7475
7476 // unsigned cul2f(ulong u) {
7477 // uint lz = clz(u);
7478 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7479 // u = (u << lz) & 0x7fffffffffffffffUL;
7480 // ulong t = u & 0xffffffffffUL;
7481 // uint v = (e << 23) | (uint)(u >> 40);
7482 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7483 // return as_float(v + r);
7484 // }
7485
7486 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7487 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7488
7489 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7490
7491 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7492 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7493
7494 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7495 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7496
7497 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7498 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7499
7500 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7501
7502 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7503 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7504
7505 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7506 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7507 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7508
7509 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7510 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7511 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7512 auto One = MIRBuilder.buildConstant(S32, 1);
7513
7514 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7515 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7516 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7517 MIRBuilder.buildAdd(Dst, V, R);
7518
7519 MI.eraseFromParent();
7520 return Legalized;
7521}
7522
7523// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7524// operations and G_SITOFP
7527 auto [Dst, Src] = MI.getFirst2Regs();
7528 const LLT S64 = LLT::scalar(64);
7529 const LLT S32 = LLT::scalar(32);
7530 const LLT S1 = LLT::scalar(1);
7531
7532 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7533
7534 // For i64 < INT_MAX we simply reuse SITOFP.
7535 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7536 // saved before division, convert to float by SITOFP, multiply the result
7537 // by 2.
7538 auto One = MIRBuilder.buildConstant(S64, 1);
7539 auto Zero = MIRBuilder.buildConstant(S64, 0);
7540 // Result if Src < INT_MAX
7541 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7542 // Result if Src >= INT_MAX
7543 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7544 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7545 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7546 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7547 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7548 // Check if the original value is larger than INT_MAX by comparing with
7549 // zero to pick one of the two conversions.
7550 auto IsLarge =
7552 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7553
7554 MI.eraseFromParent();
7555 return Legalized;
7556}
7557
7558// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7559// IEEE double representation.
7562 auto [Dst, Src] = MI.getFirst2Regs();
7563 const LLT S64 = LLT::scalar(64);
7564 const LLT S32 = LLT::scalar(32);
7565
7566 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7567
7568 // We create double value from 32 bit parts with 32 exponent difference.
7569 // Note that + and - are float operations that adjust the implicit leading
7570 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7571 //
7572 // X = 2^52 * 1.0...LowBits
7573 // Y = 2^84 * 1.0...HighBits
7574 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7575 // = - 2^52 * 1.0...HighBits
7576 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7577 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7578 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7579 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7580 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7581 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7582
7583 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7584 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7585 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7586 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7587 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7588 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7589 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7590
7591 MI.eraseFromParent();
7592 return Legalized;
7593}
7594
7596 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7597
7598 if (SrcTy == LLT::scalar(1)) {
7599 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7600 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7601 MIRBuilder.buildSelect(Dst, Src, True, False);
7602 MI.eraseFromParent();
7603 return Legalized;
7604 }
7605
7606 if (SrcTy != LLT::scalar(64))
7607 return UnableToLegalize;
7608
7609 if (DstTy == LLT::scalar(32))
7610 // TODO: SelectionDAG has several alternative expansions to port which may
7611 // be more reasonable depending on the available instructions. We also need
7612 // a more advanced mechanism to choose an optimal version depending on
7613 // target features such as sitofp or CTLZ availability.
7615
7616 if (DstTy == LLT::scalar(64))
7618
7619 return UnableToLegalize;
7620}
7621
7623 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7624
7625 const LLT S64 = LLT::scalar(64);
7626 const LLT S32 = LLT::scalar(32);
7627 const LLT S1 = LLT::scalar(1);
7628
7629 if (SrcTy == S1) {
7630 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7631 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7632 MIRBuilder.buildSelect(Dst, Src, True, False);
7633 MI.eraseFromParent();
7634 return Legalized;
7635 }
7636
7637 if (SrcTy != S64)
7638 return UnableToLegalize;
7639
7640 if (DstTy == S32) {
7641 // signed cl2f(long l) {
7642 // long s = l >> 63;
7643 // float r = cul2f((l + s) ^ s);
7644 // return s ? -r : r;
7645 // }
7646 Register L = Src;
7647 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7648 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7649
7650 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7651 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7652 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7653
7654 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7655 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7657 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7658 MI.eraseFromParent();
7659 return Legalized;
7660 }
7661
7662 return UnableToLegalize;
7663}
7664
7666 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7667 const LLT S64 = LLT::scalar(64);
7668 const LLT S32 = LLT::scalar(32);
7669
7670 if (SrcTy != S64 && SrcTy != S32)
7671 return UnableToLegalize;
7672 if (DstTy != S32 && DstTy != S64)
7673 return UnableToLegalize;
7674
7675 // FPTOSI gives same result as FPTOUI for positive signed integers.
7676 // FPTOUI needs to deal with fp values that convert to unsigned integers
7677 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7678
7679 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7680 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7682 APInt::getZero(SrcTy.getSizeInBits()));
7683 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7684
7685 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7686
7687 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7688 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7689 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7690 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7691 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7692 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7693 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7694
7695 const LLT S1 = LLT::scalar(1);
7696
7697 MachineInstrBuilder FCMP =
7698 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7699 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7700
7701 MI.eraseFromParent();
7702 return Legalized;
7703}
7704
7706 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7707 const LLT S64 = LLT::scalar(64);
7708 const LLT S32 = LLT::scalar(32);
7709
7710 // FIXME: Only f32 to i64 conversions are supported.
7711 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7712 return UnableToLegalize;
7713
7714 // Expand f32 -> i64 conversion
7715 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7716 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7717
7718 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7719
7720 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7721 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7722
7723 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7724 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7725
7726 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7727 APInt::getSignMask(SrcEltBits));
7728 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7729 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7730 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7731 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7732
7733 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7734 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7735 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7736
7737 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7738 R = MIRBuilder.buildZExt(DstTy, R);
7739
7740 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7741 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7742 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7743 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7744
7745 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7746 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7747
7748 const LLT S1 = LLT::scalar(1);
7750 S1, Exponent, ExponentLoBit);
7751
7752 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7753
7754 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7755 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7756
7757 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7758
7759 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7760 S1, Exponent, ZeroSrcTy);
7761
7762 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7763 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7764
7765 MI.eraseFromParent();
7766 return Legalized;
7767}
7768
7771 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7772
7773 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7774 unsigned SatWidth = DstTy.getScalarSizeInBits();
7775
7776 // Determine minimum and maximum integer values and their corresponding
7777 // floating-point values.
7778 APInt MinInt, MaxInt;
7779 if (IsSigned) {
7780 MinInt = APInt::getSignedMinValue(SatWidth);
7781 MaxInt = APInt::getSignedMaxValue(SatWidth);
7782 } else {
7783 MinInt = APInt::getMinValue(SatWidth);
7784 MaxInt = APInt::getMaxValue(SatWidth);
7785 }
7786
7787 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7788 APFloat MinFloat(Semantics);
7789 APFloat MaxFloat(Semantics);
7790
7791 APFloat::opStatus MinStatus =
7792 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7793 APFloat::opStatus MaxStatus =
7794 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7795 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7796 !(MaxStatus & APFloat::opStatus::opInexact);
7797
7798 // If the integer bounds are exactly representable as floats, emit a
7799 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7800 // and selects.
7801 if (AreExactFloatBounds) {
7802 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7803 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7805 SrcTy.changeElementSize(1), Src, MaxC);
7806 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7807 // Clamp by MaxFloat from above. NaN cannot occur.
7808 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7809 auto MinP =
7812 auto Min =
7813 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7814 // Convert clamped value to integer. In the unsigned case we're done,
7815 // because we mapped NaN to MinFloat, which will cast to zero.
7816 if (!IsSigned) {
7817 MIRBuilder.buildFPTOUI(Dst, Min);
7818 MI.eraseFromParent();
7819 return Legalized;
7820 }
7821
7822 // Otherwise, select 0 if Src is NaN.
7823 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7825 DstTy.changeElementSize(1), Src, Src);
7826 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7827 FpToInt);
7828 MI.eraseFromParent();
7829 return Legalized;
7830 }
7831
7832 // Result of direct conversion. The assumption here is that the operation is
7833 // non-trapping and it's fine to apply it to an out-of-range value if we
7834 // select it away later.
7835 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7836 : MIRBuilder.buildFPTOUI(DstTy, Src);
7837
7838 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7839 // MinInt if Src is NaN.
7840 auto ULT =
7842 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7843 auto Max = MIRBuilder.buildSelect(
7844 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7845 // If Src OGT MaxFloat, select MaxInt.
7846 auto OGT =
7848 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7849
7850 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7851 // is already zero.
7852 if (!IsSigned) {
7853 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7854 Max);
7855 MI.eraseFromParent();
7856 return Legalized;
7857 }
7858
7859 // Otherwise, select 0 if Src is NaN.
7860 auto Min = MIRBuilder.buildSelect(
7861 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7863 DstTy.changeElementSize(1), Src, Src);
7864 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7865 MI.eraseFromParent();
7866 return Legalized;
7867}
7868
7869// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7872 const LLT S1 = LLT::scalar(1);
7873 const LLT S32 = LLT::scalar(32);
7874
7875 auto [Dst, Src] = MI.getFirst2Regs();
7876 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7877 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7878
7879 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7880 return UnableToLegalize;
7881
7883 unsigned Flags = MI.getFlags();
7884 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7885 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7886 MI.eraseFromParent();
7887 return Legalized;
7888 }
7889
7890 const unsigned ExpMask = 0x7ff;
7891 const unsigned ExpBiasf64 = 1023;
7892 const unsigned ExpBiasf16 = 15;
7893
7894 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7895 Register U = Unmerge.getReg(0);
7896 Register UH = Unmerge.getReg(1);
7897
7898 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7900
7901 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7902 // add the f16 bias (15) to get the biased exponent for the f16 format.
7903 E = MIRBuilder.buildAdd(
7904 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7905
7908
7909 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7910 MIRBuilder.buildConstant(S32, 0x1ff));
7911 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7912
7913 auto Zero = MIRBuilder.buildConstant(S32, 0);
7914 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7915 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7916 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7917
7918 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7919 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7920 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7921 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7922
7923 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7924 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7925
7926 // N = M | (E << 12);
7927 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7928 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7929
7930 // B = clamp(1-E, 0, 13);
7931 auto One = MIRBuilder.buildConstant(S32, 1);
7932 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7933 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7935
7936 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7937 MIRBuilder.buildConstant(S32, 0x1000));
7938
7939 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7940 auto D0 = MIRBuilder.buildShl(S32, D, B);
7941
7942 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7943 D0, SigSetHigh);
7944 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7945 D = MIRBuilder.buildOr(S32, D, D1);
7946
7947 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7948 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7949
7950 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7952
7953 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7955 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7956
7957 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7959 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7960
7961 V1 = MIRBuilder.buildOr(S32, V0, V1);
7962 V = MIRBuilder.buildAdd(S32, V, V1);
7963
7964 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7965 E, MIRBuilder.buildConstant(S32, 30));
7966 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7967 MIRBuilder.buildConstant(S32, 0x7c00), V);
7968
7969 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7970 E, MIRBuilder.buildConstant(S32, 1039));
7971 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7972
7973 // Extract the sign bit.
7974 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7975 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7976
7977 // Insert the sign bit
7978 V = MIRBuilder.buildOr(S32, Sign, V);
7979
7980 MIRBuilder.buildTrunc(Dst, V);
7981 MI.eraseFromParent();
7982 return Legalized;
7983}
7984
7987 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7988 const LLT S64 = LLT::scalar(64);
7989 const LLT S16 = LLT::scalar(16);
7990
7991 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7993
7994 return UnableToLegalize;
7995}
7996
7998 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7999 LLT Ty = MRI.getType(Dst);
8000
8001 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8002 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8003 MI.eraseFromParent();
8004 return Legalized;
8005}
8006
8008 switch (Opc) {
8009 case TargetOpcode::G_SMIN:
8010 return CmpInst::ICMP_SLT;
8011 case TargetOpcode::G_SMAX:
8012 return CmpInst::ICMP_SGT;
8013 case TargetOpcode::G_UMIN:
8014 return CmpInst::ICMP_ULT;
8015 case TargetOpcode::G_UMAX:
8016 return CmpInst::ICMP_UGT;
8017 default:
8018 llvm_unreachable("not in integer min/max");
8019 }
8020}
8021
8023 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8024
8025 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8026 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8027
8028 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8029 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8030
8031 MI.eraseFromParent();
8032 return Legalized;
8033}
8034
8037 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8038
8039 Register Dst = Cmp->getReg(0);
8040 LLT DstTy = MRI.getType(Dst);
8041 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8042 LLT CmpTy = DstTy.changeElementSize(1);
8043
8044 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8047 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8050
8051 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8052 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8053 Cmp->getRHSReg());
8054 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8055 Cmp->getRHSReg());
8056
8057 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8058 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8061 auto One = MIRBuilder.buildConstant(DstTy, 1);
8062 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8063
8064 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8065 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8066 } else {
8068 std::swap(IsGT, IsLT);
8069 // Extend boolean results to DstTy, which is at least i2, before subtracting
8070 // them.
8071 unsigned BoolExtOp =
8072 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8073 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8074 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8075 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8076 }
8077
8078 MI.eraseFromParent();
8079 return Legalized;
8080}
8081
8084 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8085 const int Src0Size = Src0Ty.getScalarSizeInBits();
8086 const int Src1Size = Src1Ty.getScalarSizeInBits();
8087
8088 auto SignBitMask = MIRBuilder.buildConstant(
8089 Src0Ty, APInt::getSignMask(Src0Size));
8090
8091 auto NotSignBitMask = MIRBuilder.buildConstant(
8092 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8093
8094 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8095 Register And1;
8096 if (Src0Ty == Src1Ty) {
8097 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8098 } else if (Src0Size > Src1Size) {
8099 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8100 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8101 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8102 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8103 } else {
8104 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8105 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8106 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8107 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8108 }
8109
8110 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8111 // constants are a nan and -0.0, but the final result should preserve
8112 // everything.
8113 unsigned Flags = MI.getFlags();
8114
8115 // We masked the sign bit and the not-sign bit, so these are disjoint.
8116 Flags |= MachineInstr::Disjoint;
8117
8118 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8119
8120 MI.eraseFromParent();
8121 return Legalized;
8122}
8123
8126 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8127 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8128
8129 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8130 LLT Ty = MRI.getType(Dst);
8131
8132 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8133 // Insert canonicalizes if it's possible we need to quiet to get correct
8134 // sNaN behavior.
8135
8136 // Note this must be done here, and not as an optimization combine in the
8137 // absence of a dedicate quiet-snan instruction as we're using an
8138 // omni-purpose G_FCANONICALIZE.
8139 if (!isKnownNeverSNaN(Src0, MRI))
8140 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8141
8142 if (!isKnownNeverSNaN(Src1, MRI))
8143 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8144 }
8145
8146 // If there are no nans, it's safe to simply replace this with the non-IEEE
8147 // version.
8148 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8149 MI.eraseFromParent();
8150 return Legalized;
8151}
8152
8154 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8155 Register DstReg = MI.getOperand(0).getReg();
8156 LLT Ty = MRI.getType(DstReg);
8157 unsigned Flags = MI.getFlags();
8158
8159 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8160 Flags);
8161 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8162 MI.eraseFromParent();
8163 return Legalized;
8164}
8165
8168 auto [DstReg, X] = MI.getFirst2Regs();
8169 const unsigned Flags = MI.getFlags();
8170 const LLT Ty = MRI.getType(DstReg);
8171 const LLT CondTy = Ty.changeElementSize(1);
8172
8173 // round(x) =>
8174 // t = trunc(x);
8175 // d = fabs(x - t);
8176 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8177 // return t + o;
8178
8179 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8180
8181 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8182 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8183
8184 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8185 auto Cmp =
8186 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8187
8188 // Could emit G_UITOFP instead
8189 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8190 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8191 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8192 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8193
8194 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8195
8196 MI.eraseFromParent();
8197 return Legalized;
8198}
8199
8201 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8202 unsigned Flags = MI.getFlags();
8203 LLT Ty = MRI.getType(DstReg);
8204 const LLT CondTy = Ty.changeElementSize(1);
8205
8206 // result = trunc(src);
8207 // if (src < 0.0 && src != result)
8208 // result += -1.0.
8209
8210 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8211 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8212
8213 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8214 SrcReg, Zero, Flags);
8215 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8216 SrcReg, Trunc, Flags);
8217 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8218 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8219
8220 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8221 MI.eraseFromParent();
8222 return Legalized;
8223}
8224
8227 const unsigned NumOps = MI.getNumOperands();
8228 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8229 unsigned PartSize = Src0Ty.getSizeInBits();
8230
8231 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8232 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8233
8234 for (unsigned I = 2; I != NumOps; ++I) {
8235 const unsigned Offset = (I - 1) * PartSize;
8236
8237 Register SrcReg = MI.getOperand(I).getReg();
8238 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8239
8240 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8241 MRI.createGenericVirtualRegister(WideTy);
8242
8243 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8244 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8245 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8246 ResultReg = NextResult;
8247 }
8248
8249 if (DstTy.isPointer()) {
8251 DstTy.getAddressSpace())) {
8252 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8253 return UnableToLegalize;
8254 }
8255
8256 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8257 }
8258
8259 MI.eraseFromParent();
8260 return Legalized;
8261}
8262
8265 const unsigned NumDst = MI.getNumOperands() - 1;
8266 Register SrcReg = MI.getOperand(NumDst).getReg();
8267 Register Dst0Reg = MI.getOperand(0).getReg();
8268 LLT DstTy = MRI.getType(Dst0Reg);
8269 if (DstTy.isPointer())
8270 return UnableToLegalize; // TODO
8271
8272 SrcReg = coerceToScalar(SrcReg);
8273 if (!SrcReg)
8274 return UnableToLegalize;
8275
8276 // Expand scalarizing unmerge as bitcast to integer and shift.
8277 LLT IntTy = MRI.getType(SrcReg);
8278
8279 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8280
8281 const unsigned DstSize = DstTy.getSizeInBits();
8282 unsigned Offset = DstSize;
8283 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8284 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8285 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8286 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8287 }
8288
8289 MI.eraseFromParent();
8290 return Legalized;
8291}
8292
8293/// Lower a vector extract or insert by writing the vector to a stack temporary
8294/// and reloading the element or vector.
8295///
8296/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8297/// =>
8298/// %stack_temp = G_FRAME_INDEX
8299/// G_STORE %vec, %stack_temp
8300/// %idx = clamp(%idx, %vec.getNumElements())
8301/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8302/// %dst = G_LOAD %element_ptr
8305 Register DstReg = MI.getOperand(0).getReg();
8306 Register SrcVec = MI.getOperand(1).getReg();
8307 Register InsertVal;
8308 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8309 InsertVal = MI.getOperand(2).getReg();
8310
8311 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8312
8313 LLT VecTy = MRI.getType(SrcVec);
8314 LLT EltTy = VecTy.getElementType();
8315 unsigned NumElts = VecTy.getNumElements();
8316
8317 int64_t IdxVal;
8318 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8320 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8321
8322 if (InsertVal) {
8323 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8324 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8325 } else {
8326 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8327 }
8328
8329 MI.eraseFromParent();
8330 return Legalized;
8331 }
8332
8333 if (!EltTy.isByteSized()) { // Not implemented.
8334 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8335 return UnableToLegalize;
8336 }
8337
8338 unsigned EltBytes = EltTy.getSizeInBytes();
8339 Align VecAlign = getStackTemporaryAlignment(VecTy);
8340 Align EltAlign;
8341
8342 MachinePointerInfo PtrInfo;
8343 auto StackTemp = createStackTemporary(
8344 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8345 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8346
8347 // Get the pointer to the element, and be sure not to hit undefined behavior
8348 // if the index is out of bounds.
8349 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8350
8351 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8352 int64_t Offset = IdxVal * EltBytes;
8353 PtrInfo = PtrInfo.getWithOffset(Offset);
8354 EltAlign = commonAlignment(VecAlign, Offset);
8355 } else {
8356 // We lose information with a variable offset.
8357 EltAlign = getStackTemporaryAlignment(EltTy);
8358 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8359 }
8360
8361 if (InsertVal) {
8362 // Write the inserted element
8363 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8364
8365 // Reload the whole vector.
8366 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8367 } else {
8368 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8369 }
8370
8371 MI.eraseFromParent();
8372 return Legalized;
8373}
8374
8377 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8378 MI.getFirst3RegLLTs();
8379 LLT IdxTy = LLT::scalar(32);
8380
8381 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8382 Register Undef;
8384 LLT EltTy = DstTy.getScalarType();
8385
8386 for (int Idx : Mask) {
8387 if (Idx < 0) {
8388 if (!Undef.isValid())
8389 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8390 BuildVec.push_back(Undef);
8391 continue;
8392 }
8393
8394 if (Src0Ty.isScalar()) {
8395 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8396 } else {
8397 int NumElts = Src0Ty.getNumElements();
8398 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8399 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8400 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8401 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8402 BuildVec.push_back(Extract.getReg(0));
8403 }
8404 }
8405
8406 if (DstTy.isScalar())
8407 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8408 else
8409 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8410 MI.eraseFromParent();
8411 return Legalized;
8412}
8413
8416 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8417 MI.getFirst4RegLLTs();
8418
8419 if (VecTy.isScalableVector())
8420 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8421
8422 Align VecAlign = getStackTemporaryAlignment(VecTy);
8423 MachinePointerInfo PtrInfo;
8424 Register StackPtr =
8425 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8426 PtrInfo)
8427 .getReg(0);
8428 MachinePointerInfo ValPtrInfo =
8430
8431 LLT IdxTy = LLT::scalar(32);
8432 LLT ValTy = VecTy.getElementType();
8433 Align ValAlign = getStackTemporaryAlignment(ValTy);
8434
8435 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8436
8437 bool HasPassthru =
8438 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8439
8440 if (HasPassthru)
8441 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8442
8443 Register LastWriteVal;
8444 std::optional<APInt> PassthruSplatVal =
8445 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8446
8447 if (PassthruSplatVal.has_value()) {
8448 LastWriteVal =
8449 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8450 } else if (HasPassthru) {
8451 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8452 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8453 {LLT::scalar(32)}, {Popcount});
8454
8455 Register LastElmtPtr =
8456 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8457 LastWriteVal =
8458 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8459 .getReg(0);
8460 }
8461
8462 unsigned NumElmts = VecTy.getNumElements();
8463 for (unsigned I = 0; I < NumElmts; ++I) {
8464 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8465 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8466 Register ElmtPtr =
8467 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8468 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8469
8470 LLT MaskITy = MaskTy.getElementType();
8471 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8472 if (MaskITy.getSizeInBits() > 1)
8473 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8474
8475 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8476 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8477
8478 if (HasPassthru && I == NumElmts - 1) {
8479 auto EndOfVector =
8480 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8481 auto AllLanesSelected = MIRBuilder.buildICmp(
8482 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8483 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8484 {OutPos, EndOfVector});
8485 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8486
8487 LastWriteVal =
8488 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8489 .getReg(0);
8490 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8491 }
8492 }
8493
8494 // TODO: Use StackPtr's FrameIndex alignment.
8495 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8496
8497 MI.eraseFromParent();
8498 return Legalized;
8499}
8500
8502 Register AllocSize,
8503 Align Alignment,
8504 LLT PtrTy) {
8505 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8506
8507 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8508 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8509
8510 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8511 // have to generate an extra instruction to negate the alloc and then use
8512 // G_PTR_ADD to add the negative offset.
8513 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
8514 if (Alignment > Align(1)) {
8515 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
8516 AlignMask.negate();
8517 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8518 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8519 }
8520
8521 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8522}
8523
8526 const auto &MF = *MI.getMF();
8527 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8528 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8529 return UnableToLegalize;
8530
8531 Register Dst = MI.getOperand(0).getReg();
8532 Register AllocSize = MI.getOperand(1).getReg();
8533 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8534
8535 LLT PtrTy = MRI.getType(Dst);
8537 Register SPTmp =
8538 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8539
8540 MIRBuilder.buildCopy(SPReg, SPTmp);
8541 MIRBuilder.buildCopy(Dst, SPTmp);
8542
8543 MI.eraseFromParent();
8544 return Legalized;
8545}
8546
8550 if (!StackPtr)
8551 return UnableToLegalize;
8552
8553 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8554 MI.eraseFromParent();
8555 return Legalized;
8556}
8557
8561 if (!StackPtr)
8562 return UnableToLegalize;
8563
8564 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8565 MI.eraseFromParent();
8566 return Legalized;
8567}
8568
8571 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8572 unsigned Offset = MI.getOperand(2).getImm();
8573
8574 // Extract sub-vector or one element
8575 if (SrcTy.isVector()) {
8576 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8577 unsigned DstSize = DstTy.getSizeInBits();
8578
8579 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8580 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8581 // Unmerge and allow access to each Src element for the artifact combiner.
8582 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
8583
8584 // Take element(s) we need to extract and copy it (merge them).
8585 SmallVector<Register, 8> SubVectorElts;
8586 for (unsigned Idx = Offset / SrcEltSize;
8587 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8588 SubVectorElts.push_back(Unmerge.getReg(Idx));
8589 }
8590 if (SubVectorElts.size() == 1)
8591 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
8592 else
8593 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
8594
8595 MI.eraseFromParent();
8596 return Legalized;
8597 }
8598 }
8599
8600 if (DstTy.isScalar() &&
8601 (SrcTy.isScalar() ||
8602 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8603 LLT SrcIntTy = SrcTy;
8604 if (!SrcTy.isScalar()) {
8605 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
8606 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
8607 }
8608
8609 if (Offset == 0)
8610 MIRBuilder.buildTrunc(DstReg, SrcReg);
8611 else {
8612 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
8613 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8614 MIRBuilder.buildTrunc(DstReg, Shr);
8615 }
8616
8617 MI.eraseFromParent();
8618 return Legalized;
8619 }
8620
8621 return UnableToLegalize;
8622}
8623
8625 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
8626 uint64_t Offset = MI.getOperand(3).getImm();
8627
8628 LLT DstTy = MRI.getType(Src);
8629 LLT InsertTy = MRI.getType(InsertSrc);
8630
8631 // Insert sub-vector or one element
8632 if (DstTy.isVector() && !InsertTy.isPointer()) {
8633 LLT EltTy = DstTy.getElementType();
8634 unsigned EltSize = EltTy.getSizeInBits();
8635 unsigned InsertSize = InsertTy.getSizeInBits();
8636
8637 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8638 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8639 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8641 unsigned Idx = 0;
8642 // Elements from Src before insert start Offset
8643 for (; Idx < Offset / EltSize; ++Idx) {
8644 DstElts.push_back(UnmergeSrc.getReg(Idx));
8645 }
8646
8647 // Replace elements in Src with elements from InsertSrc
8648 if (InsertTy.getSizeInBits() > EltSize) {
8649 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8650 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8651 ++Idx, ++i) {
8652 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8653 }
8654 } else {
8655 DstElts.push_back(InsertSrc);
8656 ++Idx;
8657 }
8658
8659 // Remaining elements from Src after insert
8660 for (; Idx < DstTy.getNumElements(); ++Idx) {
8661 DstElts.push_back(UnmergeSrc.getReg(Idx));
8662 }
8663
8664 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
8665 MI.eraseFromParent();
8666 return Legalized;
8667 }
8668 }
8669
8670 if (InsertTy.isVector() ||
8671 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8672 return UnableToLegalize;
8673
8675 if ((DstTy.isPointer() &&
8676 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8677 (InsertTy.isPointer() &&
8678 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8679 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8680 return UnableToLegalize;
8681 }
8682
8683 LLT IntDstTy = DstTy;
8684
8685 if (!DstTy.isScalar()) {
8686 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8687 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8688 }
8689
8690 if (!InsertTy.isScalar()) {
8691 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8692 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8693 }
8694
8695 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8696 if (Offset != 0) {
8697 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8698 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8699 }
8700
8702 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8703
8704 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8705 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8706 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8707
8708 MIRBuilder.buildCast(Dst, Or);
8709 MI.eraseFromParent();
8710 return Legalized;
8711}
8712
8715 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8716 MI.getFirst4RegLLTs();
8717 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8718
8719 LLT Ty = Dst0Ty;
8720 LLT BoolTy = Dst1Ty;
8721
8722 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8723
8724 if (IsAdd)
8725 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
8726 else
8727 MIRBuilder.buildSub(NewDst0, LHS, RHS);
8728
8729 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8730
8731 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8732
8733 // For an addition, the result should be less than one of the operands (LHS)
8734 // if and only if the other operand (RHS) is negative, otherwise there will
8735 // be overflow.
8736 // For a subtraction, the result should be less than one of the operands
8737 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8738 // otherwise there will be overflow.
8739 auto ResultLowerThanLHS =
8740 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
8741 auto ConditionRHS = MIRBuilder.buildICmp(
8742 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8743
8744 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
8745
8746 MIRBuilder.buildCopy(Dst0, NewDst0);
8747 MI.eraseFromParent();
8748
8749 return Legalized;
8750}
8751
8754 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8755 LLT Ty = MRI.getType(Res);
8756 bool IsSigned;
8757 bool IsAdd;
8758 unsigned BaseOp;
8759 switch (MI.getOpcode()) {
8760 default:
8761 llvm_unreachable("unexpected addsat/subsat opcode");
8762 case TargetOpcode::G_UADDSAT:
8763 IsSigned = false;
8764 IsAdd = true;
8765 BaseOp = TargetOpcode::G_ADD;
8766 break;
8767 case TargetOpcode::G_SADDSAT:
8768 IsSigned = true;
8769 IsAdd = true;
8770 BaseOp = TargetOpcode::G_ADD;
8771 break;
8772 case TargetOpcode::G_USUBSAT:
8773 IsSigned = false;
8774 IsAdd = false;
8775 BaseOp = TargetOpcode::G_SUB;
8776 break;
8777 case TargetOpcode::G_SSUBSAT:
8778 IsSigned = true;
8779 IsAdd = false;
8780 BaseOp = TargetOpcode::G_SUB;
8781 break;
8782 }
8783
8784 if (IsSigned) {
8785 // sadd.sat(a, b) ->
8786 // hi = 0x7fffffff - smax(a, 0)
8787 // lo = 0x80000000 - smin(a, 0)
8788 // a + smin(smax(lo, b), hi)
8789 // ssub.sat(a, b) ->
8790 // lo = smax(a, -1) - 0x7fffffff
8791 // hi = smin(a, -1) - 0x80000000
8792 // a - smin(smax(lo, b), hi)
8793 // TODO: AMDGPU can use a "median of 3" instruction here:
8794 // a +/- med3(lo, b, hi)
8795 uint64_t NumBits = Ty.getScalarSizeInBits();
8796 auto MaxVal =
8798 auto MinVal =
8801 if (IsAdd) {
8802 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8803 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8804 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8805 } else {
8806 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8807 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8808 MaxVal);
8809 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8810 MinVal);
8811 }
8812 auto RHSClamped =
8814 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8815 } else {
8816 // uadd.sat(a, b) -> a + umin(~a, b)
8817 // usub.sat(a, b) -> a - umin(a, b)
8818 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8819 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8820 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8821 }
8822
8823 MI.eraseFromParent();
8824 return Legalized;
8825}
8826
8829 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8830 LLT Ty = MRI.getType(Res);
8831 LLT BoolTy = Ty.changeElementSize(1);
8832 bool IsSigned;
8833 bool IsAdd;
8834 unsigned OverflowOp;
8835 switch (MI.getOpcode()) {
8836 default:
8837 llvm_unreachable("unexpected addsat/subsat opcode");
8838 case TargetOpcode::G_UADDSAT:
8839 IsSigned = false;
8840 IsAdd = true;
8841 OverflowOp = TargetOpcode::G_UADDO;
8842 break;
8843 case TargetOpcode::G_SADDSAT:
8844 IsSigned = true;
8845 IsAdd = true;
8846 OverflowOp = TargetOpcode::G_SADDO;
8847 break;
8848 case TargetOpcode::G_USUBSAT:
8849 IsSigned = false;
8850 IsAdd = false;
8851 OverflowOp = TargetOpcode::G_USUBO;
8852 break;
8853 case TargetOpcode::G_SSUBSAT:
8854 IsSigned = true;
8855 IsAdd = false;
8856 OverflowOp = TargetOpcode::G_SSUBO;
8857 break;
8858 }
8859
8860 auto OverflowRes =
8861 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8862 Register Tmp = OverflowRes.getReg(0);
8863 Register Ov = OverflowRes.getReg(1);
8864 MachineInstrBuilder Clamp;
8865 if (IsSigned) {
8866 // sadd.sat(a, b) ->
8867 // {tmp, ov} = saddo(a, b)
8868 // ov ? (tmp >>s 31) + 0x80000000 : r
8869 // ssub.sat(a, b) ->
8870 // {tmp, ov} = ssubo(a, b)
8871 // ov ? (tmp >>s 31) + 0x80000000 : r
8872 uint64_t NumBits = Ty.getScalarSizeInBits();
8873 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8874 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8875 auto MinVal =
8877 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8878 } else {
8879 // uadd.sat(a, b) ->
8880 // {tmp, ov} = uaddo(a, b)
8881 // ov ? 0xffffffff : tmp
8882 // usub.sat(a, b) ->
8883 // {tmp, ov} = usubo(a, b)
8884 // ov ? 0 : tmp
8885 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8886 }
8887 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8888
8889 MI.eraseFromParent();
8890 return Legalized;
8891}
8892
8895 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8896 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8897 "Expected shlsat opcode!");
8898 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8899 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8900 LLT Ty = MRI.getType(Res);
8901 LLT BoolTy = Ty.changeElementSize(1);
8902
8903 unsigned BW = Ty.getScalarSizeInBits();
8904 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8905 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8906 : MIRBuilder.buildLShr(Ty, Result, RHS);
8907
8908 MachineInstrBuilder SatVal;
8909 if (IsSigned) {
8910 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8911 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8912 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8913 MIRBuilder.buildConstant(Ty, 0));
8914 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8915 } else {
8917 }
8918 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8919 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8920
8921 MI.eraseFromParent();
8922 return Legalized;
8923}
8924
8926 auto [Dst, Src] = MI.getFirst2Regs();
8927 const LLT Ty = MRI.getType(Src);
8928 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8929 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8930
8931 // Swap most and least significant byte, set remaining bytes in Res to zero.
8932 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8933 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8934 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8935 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8936
8937 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8938 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8939 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8940 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8941 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8942 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8943 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8944 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8945 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8946 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8947 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8948 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8949 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8950 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8951 }
8952 Res.getInstr()->getOperand(0).setReg(Dst);
8953
8954 MI.eraseFromParent();
8955 return Legalized;
8956}
8957
8958//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8960 MachineInstrBuilder Src, const APInt &Mask) {
8961 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8962 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8963 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8964 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8965 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8966 return B.buildOr(Dst, LHS, RHS);
8967}
8968
8971 auto [Dst, Src] = MI.getFirst2Regs();
8972 const LLT Ty = MRI.getType(Src);
8973 unsigned Size = Ty.getScalarSizeInBits();
8974
8975 if (Size >= 8) {
8976 MachineInstrBuilder BSWAP =
8977 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8978
8979 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8980 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8981 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8982 MachineInstrBuilder Swap4 =
8983 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8984
8985 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8986 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8987 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8988 MachineInstrBuilder Swap2 =
8989 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8990
8991 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8992 // 6|7
8993 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8994 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8995 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8996 } else {
8997 // Expand bitreverse for types smaller than 8 bits.
8999 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9001 if (I < J) {
9002 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
9003 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
9004 } else {
9005 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
9006 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
9007 }
9008
9009 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
9010 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
9011 if (I == 0)
9012 Tmp = Tmp2;
9013 else
9014 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
9015 }
9016 MIRBuilder.buildCopy(Dst, Tmp);
9017 }
9018
9019 MI.eraseFromParent();
9020 return Legalized;
9021}
9022
9026
9027 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9028 int NameOpIdx = IsRead ? 1 : 0;
9029 int ValRegIndex = IsRead ? 0 : 1;
9030
9031 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9032 const LLT Ty = MRI.getType(ValReg);
9033 const MDString *RegStr = cast<MDString>(
9034 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9035
9036 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9037 if (!PhysReg.isValid())
9038 return UnableToLegalize;
9039
9040 if (IsRead)
9041 MIRBuilder.buildCopy(ValReg, PhysReg);
9042 else
9043 MIRBuilder.buildCopy(PhysReg, ValReg);
9044
9045 MI.eraseFromParent();
9046 return Legalized;
9047}
9048
9051 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9052 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9053 Register Result = MI.getOperand(0).getReg();
9054 LLT OrigTy = MRI.getType(Result);
9055 auto SizeInBits = OrigTy.getScalarSizeInBits();
9056 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9057
9058 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9059 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9060 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9061 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9062
9063 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9064 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9065 MIRBuilder.buildTrunc(Result, Shifted);
9066
9067 MI.eraseFromParent();
9068 return Legalized;
9069}
9070
9073 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9074 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9075
9076 if (Mask == fcNone) {
9077 MIRBuilder.buildConstant(DstReg, 0);
9078 MI.eraseFromParent();
9079 return Legalized;
9080 }
9081 if (Mask == fcAllFlags) {
9082 MIRBuilder.buildConstant(DstReg, 1);
9083 MI.eraseFromParent();
9084 return Legalized;
9085 }
9086
9087 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9088 // version
9089
9090 unsigned BitSize = SrcTy.getScalarSizeInBits();
9091 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9092
9093 LLT IntTy = LLT::scalar(BitSize);
9094 if (SrcTy.isVector())
9095 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9096 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9097
9098 // Various masks.
9099 APInt SignBit = APInt::getSignMask(BitSize);
9100 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9101 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9102 APInt ExpMask = Inf;
9103 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9104 APInt QNaNBitMask =
9105 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9106 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9107
9108 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9109 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9110 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9111 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9112 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9113
9114 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9115 auto Sign =
9117
9118 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9119 // Clang doesn't support capture of structured bindings:
9120 LLT DstTyCopy = DstTy;
9121 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9122 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9123 };
9124
9125 // Tests that involve more than one class should be processed first.
9126 if ((Mask & fcFinite) == fcFinite) {
9127 // finite(V) ==> abs(V) u< exp_mask
9128 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9129 ExpMaskC));
9130 Mask &= ~fcFinite;
9131 } else if ((Mask & fcFinite) == fcPosFinite) {
9132 // finite(V) && V > 0 ==> V u< exp_mask
9133 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9134 ExpMaskC));
9135 Mask &= ~fcPosFinite;
9136 } else if ((Mask & fcFinite) == fcNegFinite) {
9137 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9138 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9139 ExpMaskC);
9140 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9141 appendToRes(And);
9142 Mask &= ~fcNegFinite;
9143 }
9144
9145 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9146 // fcZero | fcSubnormal => test all exponent bits are 0
9147 // TODO: Handle sign bit specific cases
9148 // TODO: Handle inverted case
9149 if (PartialCheck == (fcZero | fcSubnormal)) {
9150 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9152 ExpBits, ZeroC));
9153 Mask &= ~PartialCheck;
9154 }
9155 }
9156
9157 // Check for individual classes.
9158 if (FPClassTest PartialCheck = Mask & fcZero) {
9159 if (PartialCheck == fcPosZero)
9161 AsInt, ZeroC));
9162 else if (PartialCheck == fcZero)
9163 appendToRes(
9165 else // fcNegZero
9167 AsInt, SignBitC));
9168 }
9169
9170 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9171 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9172 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9173 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9174 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9175 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9176 auto SubnormalRes =
9178 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9179 if (PartialCheck == fcNegSubnormal)
9180 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9181 appendToRes(SubnormalRes);
9182 }
9183
9184 if (FPClassTest PartialCheck = Mask & fcInf) {
9185 if (PartialCheck == fcPosInf)
9187 AsInt, InfC));
9188 else if (PartialCheck == fcInf)
9189 appendToRes(
9191 else { // fcNegInf
9192 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9193 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9195 AsInt, NegInfC));
9196 }
9197 }
9198
9199 if (FPClassTest PartialCheck = Mask & fcNan) {
9200 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9201 if (PartialCheck == fcNan) {
9202 // isnan(V) ==> abs(V) u> int(inf)
9203 appendToRes(
9205 } else if (PartialCheck == fcQNan) {
9206 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9207 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9208 InfWithQnanBitC));
9209 } else { // fcSNan
9210 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9211 // abs(V) u< (unsigned(Inf) | quiet_bit)
9212 auto IsNan =
9214 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9215 Abs, InfWithQnanBitC);
9216 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9217 }
9218 }
9219
9220 if (FPClassTest PartialCheck = Mask & fcNormal) {
9221 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9222 // (max_exp-1))
9223 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9224 auto ExpMinusOne = MIRBuilder.buildSub(
9225 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9226 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9227 auto NormalRes =
9229 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9230 if (PartialCheck == fcNegNormal)
9231 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9232 else if (PartialCheck == fcPosNormal) {
9233 auto PosSign = MIRBuilder.buildXor(
9234 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
9235 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9236 }
9237 appendToRes(NormalRes);
9238 }
9239
9240 MIRBuilder.buildCopy(DstReg, Res);
9241 MI.eraseFromParent();
9242 return Legalized;
9243}
9244
9246 // Implement G_SELECT in terms of XOR, AND, OR.
9247 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9248 MI.getFirst4RegLLTs();
9249
9250 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9251 if (IsEltPtr) {
9252 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9253 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9254 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9255 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9256 DstTy = NewTy;
9257 }
9258
9259 if (MaskTy.isScalar()) {
9260 // Turn the scalar condition into a vector condition mask if needed.
9261
9262 Register MaskElt = MaskReg;
9263
9264 // The condition was potentially zero extended before, but we want a sign
9265 // extended boolean.
9266 if (MaskTy != LLT::scalar(1))
9267 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9268
9269 // Continue the sign extension (or truncate) to match the data type.
9270 MaskElt =
9271 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9272
9273 if (DstTy.isVector()) {
9274 // Generate a vector splat idiom.
9275 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9276 MaskReg = ShufSplat.getReg(0);
9277 } else {
9278 MaskReg = MaskElt;
9279 }
9280 MaskTy = DstTy;
9281 } else if (!DstTy.isVector()) {
9282 // Cannot handle the case that mask is a vector and dst is a scalar.
9283 return UnableToLegalize;
9284 }
9285
9286 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9287 return UnableToLegalize;
9288 }
9289
9290 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9291 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9292 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9293 if (IsEltPtr) {
9294 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9295 MIRBuilder.buildIntToPtr(DstReg, Or);
9296 } else {
9297 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9298 }
9299 MI.eraseFromParent();
9300 return Legalized;
9301}
9302
9304 // Split DIVREM into individual instructions.
9305 unsigned Opcode = MI.getOpcode();
9306
9308 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9309 : TargetOpcode::G_UDIV,
9310 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9312 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9313 : TargetOpcode::G_UREM,
9314 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9315 MI.eraseFromParent();
9316 return Legalized;
9317}
9318
9321 // Expand %res = G_ABS %a into:
9322 // %v1 = G_ASHR %a, scalar_size-1
9323 // %v2 = G_ADD %a, %v1
9324 // %res = G_XOR %v2, %v1
9325 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9326 Register OpReg = MI.getOperand(1).getReg();
9327 auto ShiftAmt =
9328 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9329 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9330 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9331 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9332 MI.eraseFromParent();
9333 return Legalized;
9334}
9335
9338 // Expand %res = G_ABS %a into:
9339 // %v1 = G_CONSTANT 0
9340 // %v2 = G_SUB %v1, %a
9341 // %res = G_SMAX %a, %v2
9342 Register SrcReg = MI.getOperand(1).getReg();
9343 LLT Ty = MRI.getType(SrcReg);
9344 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9345 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9346 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9347 MI.eraseFromParent();
9348 return Legalized;
9349}
9350
9353 Register SrcReg = MI.getOperand(1).getReg();
9354 Register DestReg = MI.getOperand(0).getReg();
9355 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9356 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9357 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9358 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9359 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9360 MI.eraseFromParent();
9361 return Legalized;
9362}
9363
9365 Register SrcReg = MI.getOperand(1).getReg();
9366 Register DstReg = MI.getOperand(0).getReg();
9367
9368 LLT Ty = MRI.getType(DstReg);
9369
9370 // Reset sign bit
9372 DstReg, SrcReg,
9375
9376 MI.eraseFromParent();
9377 return Legalized;
9378}
9379
9382 Register SrcReg = MI.getOperand(1).getReg();
9383 LLT SrcTy = MRI.getType(SrcReg);
9384 LLT DstTy = MRI.getType(SrcReg);
9385
9386 // The source could be a scalar if the IR type was <1 x sN>.
9387 if (SrcTy.isScalar()) {
9388 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9389 return UnableToLegalize; // FIXME: handle extension.
9390 // This can be just a plain copy.
9392 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9394 return Legalized;
9395 }
9396 return UnableToLegalize;
9397}
9398
9400 MachineFunction &MF = *MI.getMF();
9402 LLVMContext &Ctx = MF.getFunction().getContext();
9403 Register ListPtr = MI.getOperand(1).getReg();
9404 LLT PtrTy = MRI.getType(ListPtr);
9405
9406 // LstPtr is a pointer to the head of the list. Get the address
9407 // of the head of the list.
9408 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9409 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9410 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9411 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9412
9413 const Align A(MI.getOperand(2).getImm());
9414 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9415 if (A > TLI.getMinStackArgumentAlignment()) {
9416 Register AlignAmt =
9417 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9418 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9419 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9420 VAList = AndDst.getReg(0);
9421 }
9422
9423 // Increment the pointer, VAList, to the next vaarg
9424 // The list should be bumped by the size of element in the current head of
9425 // list.
9426 Register Dst = MI.getOperand(0).getReg();
9427 LLT LLTTy = MRI.getType(Dst);
9428 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9429 auto IncAmt =
9430 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9431 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9432
9433 // Store the increment VAList to the legalized pointer
9435 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9436 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9437 // Load the actual argument out of the pointer VAList
9438 Align EltAlignment = DL.getABITypeAlign(Ty);
9439 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9440 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9441 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9442
9443 MI.eraseFromParent();
9444 return Legalized;
9445}
9446
9448 // On Darwin, -Os means optimize for size without hurting performance, so
9449 // only really optimize for size when -Oz (MinSize) is used.
9451 return MF.getFunction().hasMinSize();
9452 return MF.getFunction().hasOptSize();
9453}
9454
9455// Returns a list of types to use for memory op lowering in MemOps. A partial
9456// port of findOptimalMemOpLowering in TargetLowering.
9457static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9458 unsigned Limit, const MemOp &Op,
9459 unsigned DstAS, unsigned SrcAS,
9460 const AttributeList &FuncAttributes,
9461 const TargetLowering &TLI) {
9462 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9463 return false;
9464
9465 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9466
9467 if (Ty == LLT()) {
9468 // Use the largest scalar type whose alignment constraints are satisfied.
9469 // We only need to check DstAlign here as SrcAlign is always greater or
9470 // equal to DstAlign (or zero).
9471 Ty = LLT::scalar(64);
9472 if (Op.isFixedDstAlign())
9473 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9474 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9475 Ty = LLT::scalar(Ty.getSizeInBytes());
9476 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9477 // FIXME: check for the largest legal type we can load/store to.
9478 }
9479
9480 unsigned NumMemOps = 0;
9481 uint64_t Size = Op.size();
9482 while (Size) {
9483 unsigned TySize = Ty.getSizeInBytes();
9484 while (TySize > Size) {
9485 // For now, only use non-vector load / store's for the left-over pieces.
9486 LLT NewTy = Ty;
9487 // FIXME: check for mem op safety and legality of the types. Not all of
9488 // SDAGisms map cleanly to GISel concepts.
9489 if (NewTy.isVector())
9490 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
9491 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
9492 unsigned NewTySize = NewTy.getSizeInBytes();
9493 assert(NewTySize > 0 && "Could not find appropriate type");
9494
9495 // If the new LLT cannot cover all of the remaining bits, then consider
9496 // issuing a (or a pair of) unaligned and overlapping load / store.
9497 unsigned Fast;
9498 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9499 MVT VT = getMVTForLLT(Ty);
9500 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9502 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9504 Fast)
9505 TySize = Size;
9506 else {
9507 Ty = NewTy;
9508 TySize = NewTySize;
9509 }
9510 }
9511
9512 if (++NumMemOps > Limit)
9513 return false;
9514
9515 MemOps.push_back(Ty);
9516 Size -= TySize;
9517 }
9518
9519 return true;
9520}
9521
9522// Get a vectorized representation of the memset value operand, GISel edition.
9524 MachineRegisterInfo &MRI = *MIB.getMRI();
9525 unsigned NumBits = Ty.getScalarSizeInBits();
9526 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9527 if (!Ty.isVector() && ValVRegAndVal) {
9528 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9529 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9530 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9531 }
9532
9533 // Extend the byte value to the larger type, and then multiply by a magic
9534 // value 0x010101... in order to replicate it across every byte.
9535 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9536 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9537 return MIB.buildConstant(Ty, 0).getReg(0);
9538 }
9539
9540 LLT ExtType = Ty.getScalarType();
9541 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9542 if (NumBits > 8) {
9543 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9544 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9545 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9546 }
9547
9548 // For vector types create a G_BUILD_VECTOR.
9549 if (Ty.isVector())
9550 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
9551
9552 return Val;
9553}
9554
9556LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9557 uint64_t KnownLen, Align Alignment,
9558 bool IsVolatile) {
9559 auto &MF = *MI.getParent()->getParent();
9560 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9561 auto &DL = MF.getDataLayout();
9562 LLVMContext &C = MF.getFunction().getContext();
9563
9564 assert(KnownLen != 0 && "Have a zero length memset length!");
9565
9566 bool DstAlignCanChange = false;
9567 MachineFrameInfo &MFI = MF.getFrameInfo();
9568 bool OptSize = shouldLowerMemFuncForSize(MF);
9569
9570 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9571 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9572 DstAlignCanChange = true;
9573
9574 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9575 std::vector<LLT> MemOps;
9576
9577 const auto &DstMMO = **MI.memoperands_begin();
9578 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9579
9580 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9581 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9582
9583 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9584 MemOp::Set(KnownLen, DstAlignCanChange,
9585 Alignment,
9586 /*IsZeroMemset=*/IsZeroVal,
9587 /*IsVolatile=*/IsVolatile),
9588 DstPtrInfo.getAddrSpace(), ~0u,
9589 MF.getFunction().getAttributes(), TLI))
9590 return UnableToLegalize;
9591
9592 if (DstAlignCanChange) {
9593 // Get an estimate of the type from the LLT.
9594 Type *IRTy = getTypeForLLT(MemOps[0], C);
9595 Align NewAlign = DL.getABITypeAlign(IRTy);
9596 if (NewAlign > Alignment) {
9597 Alignment = NewAlign;
9598 unsigned FI = FIDef->getOperand(1).getIndex();
9599 // Give the stack frame object a larger alignment if needed.
9600 if (MFI.getObjectAlign(FI) < Alignment)
9601 MFI.setObjectAlignment(FI, Alignment);
9602 }
9603 }
9604
9605 MachineIRBuilder MIB(MI);
9606 // Find the largest store and generate the bit pattern for it.
9607 LLT LargestTy = MemOps[0];
9608 for (unsigned i = 1; i < MemOps.size(); i++)
9609 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9610 LargestTy = MemOps[i];
9611
9612 // The memset stored value is always defined as an s8, so in order to make it
9613 // work with larger store types we need to repeat the bit pattern across the
9614 // wider type.
9615 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9616
9617 if (!MemSetValue)
9618 return UnableToLegalize;
9619
9620 // Generate the stores. For each store type in the list, we generate the
9621 // matching store of that type to the destination address.
9622 LLT PtrTy = MRI.getType(Dst);
9623 unsigned DstOff = 0;
9624 unsigned Size = KnownLen;
9625 for (unsigned I = 0; I < MemOps.size(); I++) {
9626 LLT Ty = MemOps[I];
9627 unsigned TySize = Ty.getSizeInBytes();
9628 if (TySize > Size) {
9629 // Issuing an unaligned load / store pair that overlaps with the previous
9630 // pair. Adjust the offset accordingly.
9631 assert(I == MemOps.size() - 1 && I != 0);
9632 DstOff -= TySize - Size;
9633 }
9634
9635 // If this store is smaller than the largest store see whether we can get
9636 // the smaller value for free with a truncate.
9637 Register Value = MemSetValue;
9638 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9639 MVT VT = getMVTForLLT(Ty);
9640 MVT LargestVT = getMVTForLLT(LargestTy);
9641 if (!LargestTy.isVector() && !Ty.isVector() &&
9642 TLI.isTruncateFree(LargestVT, VT))
9643 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9644 else
9645 Value = getMemsetValue(Val, Ty, MIB);
9646 if (!Value)
9647 return UnableToLegalize;
9648 }
9649
9650 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9651
9652 Register Ptr = Dst;
9653 if (DstOff != 0) {
9654 auto Offset =
9655 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9656 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9657 }
9658
9659 MIB.buildStore(Value, Ptr, *StoreMMO);
9660 DstOff += Ty.getSizeInBytes();
9661 Size -= TySize;
9662 }
9663
9664 MI.eraseFromParent();
9665 return Legalized;
9666}
9667
9669LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9670 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9671
9672 auto [Dst, Src, Len] = MI.getFirst3Regs();
9673
9674 const auto *MMOIt = MI.memoperands_begin();
9675 const MachineMemOperand *MemOp = *MMOIt;
9676 bool IsVolatile = MemOp->isVolatile();
9677
9678 // See if this is a constant length copy
9679 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9680 // FIXME: support dynamically sized G_MEMCPY_INLINE
9681 assert(LenVRegAndVal &&
9682 "inline memcpy with dynamic size is not yet supported");
9683 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9684 if (KnownLen == 0) {
9685 MI.eraseFromParent();
9686 return Legalized;
9687 }
9688
9689 const auto &DstMMO = **MI.memoperands_begin();
9690 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9691 Align DstAlign = DstMMO.getBaseAlign();
9692 Align SrcAlign = SrcMMO.getBaseAlign();
9693
9694 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9695 IsVolatile);
9696}
9697
9699LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9700 uint64_t KnownLen, Align DstAlign,
9701 Align SrcAlign, bool IsVolatile) {
9702 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9703 return lowerMemcpy(MI, Dst, Src, KnownLen,
9704 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9705 IsVolatile);
9706}
9707
9709LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9710 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9711 Align SrcAlign, bool IsVolatile) {
9712 auto &MF = *MI.getParent()->getParent();
9713 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9714 auto &DL = MF.getDataLayout();
9715 LLVMContext &C = MF.getFunction().getContext();
9716
9717 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9718
9719 bool DstAlignCanChange = false;
9720 MachineFrameInfo &MFI = MF.getFrameInfo();
9721 Align Alignment = std::min(DstAlign, SrcAlign);
9722
9723 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9724 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9725 DstAlignCanChange = true;
9726
9727 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9728 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9729 // if the memcpy is in a tail call position.
9730
9731 std::vector<LLT> MemOps;
9732
9733 const auto &DstMMO = **MI.memoperands_begin();
9734 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9735 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9736 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9737
9739 MemOps, Limit,
9740 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9741 IsVolatile),
9742 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9743 MF.getFunction().getAttributes(), TLI))
9744 return UnableToLegalize;
9745
9746 if (DstAlignCanChange) {
9747 // Get an estimate of the type from the LLT.
9748 Type *IRTy = getTypeForLLT(MemOps[0], C);
9749 Align NewAlign = DL.getABITypeAlign(IRTy);
9750
9751 // Don't promote to an alignment that would require dynamic stack
9752 // realignment.
9753 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9754 if (!TRI->hasStackRealignment(MF))
9755 if (MaybeAlign StackAlign = DL.getStackAlignment())
9756 NewAlign = std::min(NewAlign, *StackAlign);
9757
9758 if (NewAlign > Alignment) {
9759 Alignment = NewAlign;
9760 unsigned FI = FIDef->getOperand(1).getIndex();
9761 // Give the stack frame object a larger alignment if needed.
9762 if (MFI.getObjectAlign(FI) < Alignment)
9763 MFI.setObjectAlignment(FI, Alignment);
9764 }
9765 }
9766
9767 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9768
9769 MachineIRBuilder MIB(MI);
9770 // Now we need to emit a pair of load and stores for each of the types we've
9771 // collected. I.e. for each type, generate a load from the source pointer of
9772 // that type width, and then generate a corresponding store to the dest buffer
9773 // of that value loaded. This can result in a sequence of loads and stores
9774 // mixed types, depending on what the target specifies as good types to use.
9775 unsigned CurrOffset = 0;
9776 unsigned Size = KnownLen;
9777 for (auto CopyTy : MemOps) {
9778 // Issuing an unaligned load / store pair that overlaps with the previous
9779 // pair. Adjust the offset accordingly.
9780 if (CopyTy.getSizeInBytes() > Size)
9781 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9782
9783 // Construct MMOs for the accesses.
9784 auto *LoadMMO =
9785 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9786 auto *StoreMMO =
9787 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9788
9789 // Create the load.
9790 Register LoadPtr = Src;
9792 if (CurrOffset != 0) {
9793 LLT SrcTy = MRI.getType(Src);
9794 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9795 .getReg(0);
9796 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9797 }
9798 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9799
9800 // Create the store.
9801 Register StorePtr = Dst;
9802 if (CurrOffset != 0) {
9803 LLT DstTy = MRI.getType(Dst);
9804 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9805 }
9806 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9807 CurrOffset += CopyTy.getSizeInBytes();
9808 Size -= CopyTy.getSizeInBytes();
9809 }
9810
9811 MI.eraseFromParent();
9812 return Legalized;
9813}
9814
9816LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9817 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9818 bool IsVolatile) {
9819 auto &MF = *MI.getParent()->getParent();
9820 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9821 auto &DL = MF.getDataLayout();
9822 LLVMContext &C = MF.getFunction().getContext();
9823
9824 assert(KnownLen != 0 && "Have a zero length memmove length!");
9825
9826 bool DstAlignCanChange = false;
9827 MachineFrameInfo &MFI = MF.getFrameInfo();
9828 bool OptSize = shouldLowerMemFuncForSize(MF);
9829 Align Alignment = std::min(DstAlign, SrcAlign);
9830
9831 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9832 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9833 DstAlignCanChange = true;
9834
9835 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9836 std::vector<LLT> MemOps;
9837
9838 const auto &DstMMO = **MI.memoperands_begin();
9839 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9840 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9841 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9842
9843 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9844 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9845 // same thing here.
9847 MemOps, Limit,
9848 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9849 /*IsVolatile*/ true),
9850 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9851 MF.getFunction().getAttributes(), TLI))
9852 return UnableToLegalize;
9853
9854 if (DstAlignCanChange) {
9855 // Get an estimate of the type from the LLT.
9856 Type *IRTy = getTypeForLLT(MemOps[0], C);
9857 Align NewAlign = DL.getABITypeAlign(IRTy);
9858
9859 // Don't promote to an alignment that would require dynamic stack
9860 // realignment.
9861 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9862 if (!TRI->hasStackRealignment(MF))
9863 if (MaybeAlign StackAlign = DL.getStackAlignment())
9864 NewAlign = std::min(NewAlign, *StackAlign);
9865
9866 if (NewAlign > Alignment) {
9867 Alignment = NewAlign;
9868 unsigned FI = FIDef->getOperand(1).getIndex();
9869 // Give the stack frame object a larger alignment if needed.
9870 if (MFI.getObjectAlign(FI) < Alignment)
9871 MFI.setObjectAlignment(FI, Alignment);
9872 }
9873 }
9874
9875 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9876
9877 MachineIRBuilder MIB(MI);
9878 // Memmove requires that we perform the loads first before issuing the stores.
9879 // Apart from that, this loop is pretty much doing the same thing as the
9880 // memcpy codegen function.
9881 unsigned CurrOffset = 0;
9883 for (auto CopyTy : MemOps) {
9884 // Construct MMO for the load.
9885 auto *LoadMMO =
9886 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9887
9888 // Create the load.
9889 Register LoadPtr = Src;
9890 if (CurrOffset != 0) {
9891 LLT SrcTy = MRI.getType(Src);
9892 auto Offset =
9893 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9894 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9895 }
9896 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9897 CurrOffset += CopyTy.getSizeInBytes();
9898 }
9899
9900 CurrOffset = 0;
9901 for (unsigned I = 0; I < MemOps.size(); ++I) {
9902 LLT CopyTy = MemOps[I];
9903 // Now store the values loaded.
9904 auto *StoreMMO =
9905 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9906
9907 Register StorePtr = Dst;
9908 if (CurrOffset != 0) {
9909 LLT DstTy = MRI.getType(Dst);
9910 auto Offset =
9911 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9912 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9913 }
9914 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9915 CurrOffset += CopyTy.getSizeInBytes();
9916 }
9917 MI.eraseFromParent();
9918 return Legalized;
9919}
9920
9923 const unsigned Opc = MI.getOpcode();
9924 // This combine is fairly complex so it's not written with a separate
9925 // matcher function.
9926 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9927 Opc == TargetOpcode::G_MEMSET) &&
9928 "Expected memcpy like instruction");
9929
9930 auto MMOIt = MI.memoperands_begin();
9931 const MachineMemOperand *MemOp = *MMOIt;
9932
9933 Align DstAlign = MemOp->getBaseAlign();
9934 Align SrcAlign;
9935 auto [Dst, Src, Len] = MI.getFirst3Regs();
9936
9937 if (Opc != TargetOpcode::G_MEMSET) {
9938 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9939 MemOp = *(++MMOIt);
9940 SrcAlign = MemOp->getBaseAlign();
9941 }
9942
9943 // See if this is a constant length copy
9944 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9945 if (!LenVRegAndVal)
9946 return UnableToLegalize;
9947 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9948
9949 if (KnownLen == 0) {
9950 MI.eraseFromParent();
9951 return Legalized;
9952 }
9953
9954 bool IsVolatile = MemOp->isVolatile();
9955 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9956 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9957 IsVolatile);
9958
9959 // Don't try to optimize volatile.
9960 if (IsVolatile)
9961 return UnableToLegalize;
9962
9963 if (MaxLen && KnownLen > MaxLen)
9964 return UnableToLegalize;
9965
9966 if (Opc == TargetOpcode::G_MEMCPY) {
9967 auto &MF = *MI.getParent()->getParent();
9968 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9969 bool OptSize = shouldLowerMemFuncForSize(MF);
9970 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9971 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9972 IsVolatile);
9973 }
9974 if (Opc == TargetOpcode::G_MEMMOVE)
9975 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9976 if (Opc == TargetOpcode::G_MEMSET)
9977 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9978 return UnableToLegalize;
9979}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:74
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1135
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1095
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1119
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:844
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
bool isSigned() const
Definition: InstrTypes.h:928
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
const APFloat & getValueAPF() const
Definition: Constants.h:314
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:348
bool isBigEndian() const
Definition: DataLayout.h:198
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Represent a G_FCMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:260
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr bool isPointerOrPointerVector() const
Definition: LowLevelType.h:153
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT getScalarType() const
Definition: LowLevelType.h:205
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:946
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:573
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:244
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1987
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:645
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLT getLLTForMVT(MVT Ty)
Get a rough equivalent of an LLT for a given MVT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1523
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1580
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1172
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:366
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:500
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:235
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:342
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1260
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:603
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:265
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:266
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:313
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Definition: CallLowering.h:51
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)