LLVM 20.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
185 SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end());
186 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
187 return mergeMixedSubvectors(DstReg, AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
202 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (!MRI.getType(Leftover).isVector())
216 AllElts.push_back(Leftover);
217 else
218 appendVectorElts(AllElts, Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
245 getUnmergeResults(Parts, *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(SrcReg);
252 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(DstTy, NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
281 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(DstReg, Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372
373 MIRBuilder.buildUnmerge(UnmergeDefs,
374 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_MUL:
414 RTLIBCASE_INT(MUL_I);
415 case TargetOpcode::G_SDIV:
416 RTLIBCASE_INT(SDIV_I);
417 case TargetOpcode::G_UDIV:
418 RTLIBCASE_INT(UDIV_I);
419 case TargetOpcode::G_SREM:
420 RTLIBCASE_INT(SREM_I);
421 case TargetOpcode::G_UREM:
422 RTLIBCASE_INT(UREM_I);
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424 RTLIBCASE_INT(CTLZ_I);
425 case TargetOpcode::G_FADD:
426 RTLIBCASE(ADD_F);
427 case TargetOpcode::G_FSUB:
428 RTLIBCASE(SUB_F);
429 case TargetOpcode::G_FMUL:
430 RTLIBCASE(MUL_F);
431 case TargetOpcode::G_FDIV:
432 RTLIBCASE(DIV_F);
433 case TargetOpcode::G_FEXP:
434 RTLIBCASE(EXP_F);
435 case TargetOpcode::G_FEXP2:
436 RTLIBCASE(EXP2_F);
437 case TargetOpcode::G_FEXP10:
438 RTLIBCASE(EXP10_F);
439 case TargetOpcode::G_FREM:
440 RTLIBCASE(REM_F);
441 case TargetOpcode::G_FPOW:
442 RTLIBCASE(POW_F);
443 case TargetOpcode::G_FPOWI:
444 RTLIBCASE(POWI_F);
445 case TargetOpcode::G_FMA:
446 RTLIBCASE(FMA_F);
447 case TargetOpcode::G_FSIN:
448 RTLIBCASE(SIN_F);
449 case TargetOpcode::G_FCOS:
450 RTLIBCASE(COS_F);
451 case TargetOpcode::G_FTAN:
452 RTLIBCASE(TAN_F);
453 case TargetOpcode::G_FASIN:
454 RTLIBCASE(ASIN_F);
455 case TargetOpcode::G_FACOS:
456 RTLIBCASE(ACOS_F);
457 case TargetOpcode::G_FATAN:
458 RTLIBCASE(ATAN_F);
459 case TargetOpcode::G_FATAN2:
460 RTLIBCASE(ATAN2_F);
461 case TargetOpcode::G_FSINH:
462 RTLIBCASE(SINH_F);
463 case TargetOpcode::G_FCOSH:
464 RTLIBCASE(COSH_F);
465 case TargetOpcode::G_FTANH:
466 RTLIBCASE(TANH_F);
467 case TargetOpcode::G_FLOG10:
468 RTLIBCASE(LOG10_F);
469 case TargetOpcode::G_FLOG:
470 RTLIBCASE(LOG_F);
471 case TargetOpcode::G_FLOG2:
472 RTLIBCASE(LOG2_F);
473 case TargetOpcode::G_FLDEXP:
474 RTLIBCASE(LDEXP_F);
475 case TargetOpcode::G_FCEIL:
476 RTLIBCASE(CEIL_F);
477 case TargetOpcode::G_FFLOOR:
478 RTLIBCASE(FLOOR_F);
479 case TargetOpcode::G_FMINNUM:
480 RTLIBCASE(FMIN_F);
481 case TargetOpcode::G_FMAXNUM:
482 RTLIBCASE(FMAX_F);
483 case TargetOpcode::G_FSQRT:
484 RTLIBCASE(SQRT_F);
485 case TargetOpcode::G_FRINT:
486 RTLIBCASE(RINT_F);
487 case TargetOpcode::G_FNEARBYINT:
488 RTLIBCASE(NEARBYINT_F);
489 case TargetOpcode::G_INTRINSIC_TRUNC:
490 RTLIBCASE(TRUNC_F);
491 case TargetOpcode::G_INTRINSIC_ROUND:
492 RTLIBCASE(ROUND_F);
493 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
494 RTLIBCASE(ROUNDEVEN_F);
495 case TargetOpcode::G_INTRINSIC_LRINT:
496 RTLIBCASE(LRINT_F);
497 case TargetOpcode::G_INTRINSIC_LLRINT:
498 RTLIBCASE(LLRINT_F);
499 }
500 llvm_unreachable("Unknown libcall function");
501#undef RTLIBCASE_INT
502#undef RTLIBCASE
503}
504
505/// True if an instruction is in tail position in its caller. Intended for
506/// legalizing libcalls as tail calls when possible.
509 const TargetInstrInfo &TII,
511 MachineBasicBlock &MBB = *MI.getParent();
512 const Function &F = MBB.getParent()->getFunction();
513
514 // Conservatively require the attributes of the call to match those of
515 // the return. Ignore NoAlias and NonNull because they don't affect the
516 // call sequence.
517 AttributeList CallerAttrs = F.getAttributes();
518 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
519 .removeAttribute(Attribute::NoAlias)
520 .removeAttribute(Attribute::NonNull)
521 .hasAttributes())
522 return false;
523
524 // It's not safe to eliminate the sign / zero extension of the return value.
525 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
526 CallerAttrs.hasRetAttr(Attribute::SExt))
527 return false;
528
529 // Only tail call if the following instruction is a standard return or if we
530 // have a `thisreturn` callee, and a sequence like:
531 //
532 // G_MEMCPY %0, %1, %2
533 // $x0 = COPY %0
534 // RET_ReallyLR implicit $x0
535 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
536 if (Next != MBB.instr_end() && Next->isCopy()) {
537 if (MI.getOpcode() == TargetOpcode::G_BZERO)
538 return false;
539
540 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
541 // mempy/etc routines return the same parameter. For other it will be the
542 // returned value.
543 Register VReg = MI.getOperand(0).getReg();
544 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
545 return false;
546
547 Register PReg = Next->getOperand(0).getReg();
548 if (!PReg.isPhysical())
549 return false;
550
551 auto Ret = next_nodbg(Next, MBB.instr_end());
552 if (Ret == MBB.instr_end() || !Ret->isReturn())
553 return false;
554
555 if (Ret->getNumImplicitOperands() != 1)
556 return false;
557
558 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
559 return false;
560
561 // Skip over the COPY that we just validated.
562 Next = Ret;
563 }
564
565 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
566 return false;
567
568 return true;
569}
570
573 const CallLowering::ArgInfo &Result,
575 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
576 MachineInstr *MI) {
577 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
578
580 Info.CallConv = CC;
582 Info.OrigRet = Result;
583 if (MI)
584 Info.IsTailCall =
585 (Result.Ty->isVoidTy() ||
586 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
587 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
588 *MIRBuilder.getMRI());
589
590 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
591 if (!CLI.lowerCall(MIRBuilder, Info))
593
594 if (MI && Info.LoweredTailCall) {
595 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
596
597 // Check debug locations before removing the return.
598 LocObserver.checkpoint(true);
599
600 // We must have a return following the call (or debug insts) to get past
601 // isLibCallInTailPosition.
602 do {
603 MachineInstr *Next = MI->getNextNode();
604 assert(Next &&
605 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
606 "Expected instr following MI to be return or debug inst?");
607 // We lowered a tail call, so the call is now the return from the block.
608 // Delete the old return.
609 Next->eraseFromParent();
610 } while (MI->getNextNode());
611
612 // We expect to lose the debug location from the return.
613 LocObserver.checkpoint(false);
614 }
616}
617
620 const CallLowering::ArgInfo &Result,
622 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
623 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
624 const char *Name = TLI.getLibcallName(Libcall);
625 if (!Name)
627 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
628 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
629}
630
631// Useful for libcalls where all operands have the same type.
634 Type *OpType, LostDebugLocObserver &LocObserver) {
635 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
636
637 // FIXME: What does the original arg index mean here?
639 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
640 Args.push_back({MO.getReg(), OpType, 0});
641 return createLibcall(MIRBuilder, Libcall,
642 {MI.getOperand(0).getReg(), OpType, 0}, Args,
643 LocObserver, &MI);
644}
645
648 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
649 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
650
652 // Add all the args, except for the last which is an imm denoting 'tail'.
653 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
654 Register Reg = MI.getOperand(i).getReg();
655
656 // Need derive an IR type for call lowering.
657 LLT OpLLT = MRI.getType(Reg);
658 Type *OpTy = nullptr;
659 if (OpLLT.isPointer())
660 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
661 else
662 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
663 Args.push_back({Reg, OpTy, 0});
664 }
665
666 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
667 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
668 RTLIB::Libcall RTLibcall;
669 unsigned Opc = MI.getOpcode();
670 switch (Opc) {
671 case TargetOpcode::G_BZERO:
672 RTLibcall = RTLIB::BZERO;
673 break;
674 case TargetOpcode::G_MEMCPY:
675 RTLibcall = RTLIB::MEMCPY;
676 Args[0].Flags[0].setReturned();
677 break;
678 case TargetOpcode::G_MEMMOVE:
679 RTLibcall = RTLIB::MEMMOVE;
680 Args[0].Flags[0].setReturned();
681 break;
682 case TargetOpcode::G_MEMSET:
683 RTLibcall = RTLIB::MEMSET;
684 Args[0].Flags[0].setReturned();
685 break;
686 default:
687 llvm_unreachable("unsupported opcode");
688 }
689 const char *Name = TLI.getLibcallName(RTLibcall);
690
691 // Unsupported libcall on the target.
692 if (!Name) {
693 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
694 << MIRBuilder.getTII().getName(Opc) << "\n");
696 }
697
699 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
701 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
702 Info.IsTailCall =
703 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
704 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
705
706 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
707 if (!CLI.lowerCall(MIRBuilder, Info))
709
710 if (Info.LoweredTailCall) {
711 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
712
713 // Check debug locations before removing the return.
714 LocObserver.checkpoint(true);
715
716 // We must have a return following the call (or debug insts) to get past
717 // isLibCallInTailPosition.
718 do {
719 MachineInstr *Next = MI.getNextNode();
720 assert(Next &&
721 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
722 "Expected instr following MI to be return or debug inst?");
723 // We lowered a tail call, so the call is now the return from the block.
724 // Delete the old return.
725 Next->eraseFromParent();
726 } while (MI.getNextNode());
727
728 // We expect to lose the debug location from the return.
729 LocObserver.checkpoint(false);
730 }
731
733}
734
736 unsigned Opc = MI.getOpcode();
737 auto &AtomicMI = cast<GMemOperation>(MI);
738 auto &MMO = AtomicMI.getMMO();
739 auto Ordering = MMO.getMergedOrdering();
740 LLT MemType = MMO.getMemoryType();
741 uint64_t MemSize = MemType.getSizeInBytes();
742 if (MemType.isVector())
743 return RTLIB::UNKNOWN_LIBCALL;
744
745#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
746#define LCALL5(A) \
747 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
748 switch (Opc) {
749 case TargetOpcode::G_ATOMIC_CMPXCHG:
750 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
751 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
752 return getOutlineAtomicHelper(LC, Ordering, MemSize);
753 }
754 case TargetOpcode::G_ATOMICRMW_XCHG: {
755 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
756 return getOutlineAtomicHelper(LC, Ordering, MemSize);
757 }
758 case TargetOpcode::G_ATOMICRMW_ADD:
759 case TargetOpcode::G_ATOMICRMW_SUB: {
760 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
761 return getOutlineAtomicHelper(LC, Ordering, MemSize);
762 }
763 case TargetOpcode::G_ATOMICRMW_AND: {
764 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
765 return getOutlineAtomicHelper(LC, Ordering, MemSize);
766 }
767 case TargetOpcode::G_ATOMICRMW_OR: {
768 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
769 return getOutlineAtomicHelper(LC, Ordering, MemSize);
770 }
771 case TargetOpcode::G_ATOMICRMW_XOR: {
772 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
773 return getOutlineAtomicHelper(LC, Ordering, MemSize);
774 }
775 default:
776 return RTLIB::UNKNOWN_LIBCALL;
777 }
778#undef LCALLS
779#undef LCALL5
780}
781
784 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
785
786 Type *RetTy;
787 SmallVector<Register> RetRegs;
789 unsigned Opc = MI.getOpcode();
790 switch (Opc) {
791 case TargetOpcode::G_ATOMIC_CMPXCHG:
792 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
794 LLT SuccessLLT;
795 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
796 MI.getFirst4RegLLTs();
797 RetRegs.push_back(Ret);
798 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
799 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
800 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
801 NewLLT) = MI.getFirst5RegLLTs();
802 RetRegs.push_back(Success);
804 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
805 }
806 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
807 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
808 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
809 break;
810 }
811 case TargetOpcode::G_ATOMICRMW_XCHG:
812 case TargetOpcode::G_ATOMICRMW_ADD:
813 case TargetOpcode::G_ATOMICRMW_SUB:
814 case TargetOpcode::G_ATOMICRMW_AND:
815 case TargetOpcode::G_ATOMICRMW_OR:
816 case TargetOpcode::G_ATOMICRMW_XOR: {
817 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
818 RetRegs.push_back(Ret);
819 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
820 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
821 Val =
822 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
823 .getReg(0);
824 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
825 Val =
826 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
827 .getReg(0);
828 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
829 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
830 break;
831 }
832 default:
833 llvm_unreachable("unsupported opcode");
834 }
835
836 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
837 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
839 const char *Name = TLI.getLibcallName(RTLibcall);
840
841 // Unsupported libcall on the target.
842 if (!Name) {
843 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
844 << MIRBuilder.getTII().getName(Opc) << "\n");
846 }
847
849 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
851 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
852
853 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
858}
859
860static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
861 Type *FromType) {
862 auto ToMVT = MVT::getVT(ToType);
863 auto FromMVT = MVT::getVT(FromType);
864
865 switch (Opcode) {
866 case TargetOpcode::G_FPEXT:
867 return RTLIB::getFPEXT(FromMVT, ToMVT);
868 case TargetOpcode::G_FPTRUNC:
869 return RTLIB::getFPROUND(FromMVT, ToMVT);
870 case TargetOpcode::G_FPTOSI:
871 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
872 case TargetOpcode::G_FPTOUI:
873 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
874 case TargetOpcode::G_SITOFP:
875 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
876 case TargetOpcode::G_UITOFP:
877 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
878 }
879 llvm_unreachable("Unsupported libcall function");
880}
881
884 Type *FromType, LostDebugLocObserver &LocObserver,
885 const TargetLowering &TLI, bool IsSigned = false) {
886 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
887 if (FromType->isIntegerTy()) {
888 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
889 Arg.Flags[0].setSExt();
890 else
891 Arg.Flags[0].setZExt();
892 }
893
894 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
895 return createLibcall(MIRBuilder, Libcall,
896 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
897 &MI);
898}
899
900static RTLIB::Libcall
902 RTLIB::Libcall RTLibcall;
903 switch (MI.getOpcode()) {
904 case TargetOpcode::G_GET_FPENV:
905 RTLibcall = RTLIB::FEGETENV;
906 break;
907 case TargetOpcode::G_SET_FPENV:
908 case TargetOpcode::G_RESET_FPENV:
909 RTLibcall = RTLIB::FESETENV;
910 break;
911 case TargetOpcode::G_GET_FPMODE:
912 RTLibcall = RTLIB::FEGETMODE;
913 break;
914 case TargetOpcode::G_SET_FPMODE:
915 case TargetOpcode::G_RESET_FPMODE:
916 RTLibcall = RTLIB::FESETMODE;
917 break;
918 default:
919 llvm_unreachable("Unexpected opcode");
920 }
921 return RTLibcall;
922}
923
924// Some library functions that read FP state (fegetmode, fegetenv) write the
925// state into a region in memory. IR intrinsics that do the same operations
926// (get_fpmode, get_fpenv) return the state as integer value. To implement these
927// intrinsics via the library functions, we need to use temporary variable,
928// for example:
929//
930// %0:_(s32) = G_GET_FPMODE
931//
932// is transformed to:
933//
934// %1:_(p0) = G_FRAME_INDEX %stack.0
935// BL &fegetmode
936// %0:_(s32) = G_LOAD % 1
937//
939LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
941 LostDebugLocObserver &LocObserver) {
943 auto &MF = MIRBuilder.getMF();
944 auto &MRI = *MIRBuilder.getMRI();
945 auto &Ctx = MF.getFunction().getContext();
946
947 // Create temporary, where library function will put the read state.
948 Register Dst = MI.getOperand(0).getReg();
949 LLT StateTy = MRI.getType(Dst);
950 TypeSize StateSize = StateTy.getSizeInBytes();
952 MachinePointerInfo TempPtrInfo;
953 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
954
955 // Create a call to library function, with the temporary as an argument.
956 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
957 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
959 auto Res =
960 createLibcall(MIRBuilder, RTLibcall,
962 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
963 LocObserver, nullptr);
965 return Res;
966
967 // Create a load from the temporary.
968 MachineMemOperand *MMO = MF.getMachineMemOperand(
969 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
970 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
971
973}
974
975// Similar to `createGetStateLibcall` the function calls a library function
976// using transient space in stack. In this case the library function reads
977// content of memory region.
979LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
981 LostDebugLocObserver &LocObserver) {
983 auto &MF = MIRBuilder.getMF();
984 auto &MRI = *MIRBuilder.getMRI();
985 auto &Ctx = MF.getFunction().getContext();
986
987 // Create temporary, where library function will get the new state.
988 Register Src = MI.getOperand(0).getReg();
989 LLT StateTy = MRI.getType(Src);
990 TypeSize StateSize = StateTy.getSizeInBytes();
992 MachinePointerInfo TempPtrInfo;
993 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
994
995 // Put the new state into the temporary.
996 MachineMemOperand *MMO = MF.getMachineMemOperand(
997 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
998 MIRBuilder.buildStore(Src, Temp, *MMO);
999
1000 // Create a call to library function, with the temporary as an argument.
1001 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1002 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1004 return createLibcall(MIRBuilder, RTLibcall,
1006 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1007 LocObserver, nullptr);
1008}
1009
1010/// Returns the corresponding libcall for the given Pred and
1011/// the ICMP predicate that should be generated to compare with #0
1012/// after the libcall.
1013static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1015#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1016 do { \
1017 switch (Size) { \
1018 case 32: \
1019 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1020 case 64: \
1021 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1022 case 128: \
1023 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1024 default: \
1025 llvm_unreachable("unexpected size"); \
1026 } \
1027 } while (0)
1028
1029 switch (Pred) {
1030 case CmpInst::FCMP_OEQ:
1032 case CmpInst::FCMP_UNE:
1034 case CmpInst::FCMP_OGE:
1036 case CmpInst::FCMP_OLT:
1038 case CmpInst::FCMP_OLE:
1040 case CmpInst::FCMP_OGT:
1042 case CmpInst::FCMP_UNO:
1044 default:
1045 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1046 }
1047}
1048
1050LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1052 LostDebugLocObserver &LocObserver) {
1053 auto &MF = MIRBuilder.getMF();
1054 auto &Ctx = MF.getFunction().getContext();
1055 const GFCmp *Cmp = cast<GFCmp>(&MI);
1056
1057 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1058 unsigned Size = OpLLT.getSizeInBits();
1059 if ((Size != 32 && Size != 64 && Size != 128) ||
1060 OpLLT != MRI.getType(Cmp->getRHSReg()))
1061 return UnableToLegalize;
1062
1063 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1064
1065 // DstReg type is s32
1066 const Register DstReg = Cmp->getReg(0);
1067 LLT DstTy = MRI.getType(DstReg);
1068 const auto Cond = Cmp->getCond();
1069
1070 // Reference:
1071 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1072 // Generates a libcall followed by ICMP.
1073 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1074 const CmpInst::Predicate ICmpPred,
1075 const DstOp &Res) -> Register {
1076 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1077 constexpr LLT TempLLT = LLT::scalar(32);
1078 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1079 // Generate libcall, holding result in Temp
1080 const auto Status = createLibcall(
1081 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1082 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1083 LocObserver, &MI);
1084 if (!Status)
1085 return {};
1086
1087 // Compare temp with #0 to get the final result.
1088 return MIRBuilder
1089 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1090 .getReg(0);
1091 };
1092
1093 // Simple case if we have a direct mapping from predicate to libcall
1094 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1095 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1096 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1097 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1098 return Legalized;
1099 }
1100 return UnableToLegalize;
1101 }
1102
1103 // No direct mapping found, should be generated as combination of libcalls.
1104
1105 switch (Cond) {
1106 case CmpInst::FCMP_UEQ: {
1107 // FCMP_UEQ: unordered or equal
1108 // Convert into (FCMP_OEQ || FCMP_UNO).
1109
1110 const auto [OeqLibcall, OeqPred] =
1112 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1113
1114 const auto [UnoLibcall, UnoPred] =
1116 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1117 if (Oeq && Uno)
1118 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1119 else
1120 return UnableToLegalize;
1121
1122 break;
1123 }
1124 case CmpInst::FCMP_ONE: {
1125 // FCMP_ONE: ordered and operands are unequal
1126 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1127
1128 // We inverse the predicate instead of generating a NOT
1129 // to save one instruction.
1130 // On AArch64 isel can even select two cmp into a single ccmp.
1131 const auto [OeqLibcall, OeqPred] =
1133 const auto NotOeq =
1134 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1135
1136 const auto [UnoLibcall, UnoPred] =
1138 const auto NotUno =
1139 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1140
1141 if (NotOeq && NotUno)
1142 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1143 else
1144 return UnableToLegalize;
1145
1146 break;
1147 }
1148 case CmpInst::FCMP_ULT:
1149 case CmpInst::FCMP_UGE:
1150 case CmpInst::FCMP_UGT:
1151 case CmpInst::FCMP_ULE:
1152 case CmpInst::FCMP_ORD: {
1153 // Convert into: !(inverse(Pred))
1154 // E.g. FCMP_ULT becomes !FCMP_OGE
1155 // This is equivalent to the following, but saves some instructions.
1156 // MIRBuilder.buildNot(
1157 // PredTy,
1158 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1159 // Op1, Op2));
1160 const auto [InversedLibcall, InversedPred] =
1162 if (!BuildLibcall(InversedLibcall,
1163 CmpInst::getInversePredicate(InversedPred), DstReg))
1164 return UnableToLegalize;
1165 break;
1166 }
1167 default:
1168 return UnableToLegalize;
1169 }
1170
1171 return Legalized;
1172}
1173
1174// The function is used to legalize operations that set default environment
1175// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1176// On most targets supported in glibc FE_DFL_MODE is defined as
1177// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1178// it is not true, the target must provide custom lowering.
1180LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1182 LostDebugLocObserver &LocObserver) {
1184 auto &MF = MIRBuilder.getMF();
1185 auto &Ctx = MF.getFunction().getContext();
1186
1187 // Create an argument for the library function.
1188 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1189 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1190 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1191 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1192 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1193 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1194 MIRBuilder.buildIntToPtr(Dest, DefValue);
1195
1197 return createLibcall(MIRBuilder, RTLibcall,
1199 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1200 LocObserver, &MI);
1201}
1202
1205 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1206
1207 switch (MI.getOpcode()) {
1208 default:
1209 return UnableToLegalize;
1210 case TargetOpcode::G_MUL:
1211 case TargetOpcode::G_SDIV:
1212 case TargetOpcode::G_UDIV:
1213 case TargetOpcode::G_SREM:
1214 case TargetOpcode::G_UREM:
1215 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1216 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1217 unsigned Size = LLTy.getSizeInBits();
1218 Type *HLTy = IntegerType::get(Ctx, Size);
1219 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1220 if (Status != Legalized)
1221 return Status;
1222 break;
1223 }
1224 case TargetOpcode::G_FADD:
1225 case TargetOpcode::G_FSUB:
1226 case TargetOpcode::G_FMUL:
1227 case TargetOpcode::G_FDIV:
1228 case TargetOpcode::G_FMA:
1229 case TargetOpcode::G_FPOW:
1230 case TargetOpcode::G_FREM:
1231 case TargetOpcode::G_FCOS:
1232 case TargetOpcode::G_FSIN:
1233 case TargetOpcode::G_FTAN:
1234 case TargetOpcode::G_FACOS:
1235 case TargetOpcode::G_FASIN:
1236 case TargetOpcode::G_FATAN:
1237 case TargetOpcode::G_FATAN2:
1238 case TargetOpcode::G_FCOSH:
1239 case TargetOpcode::G_FSINH:
1240 case TargetOpcode::G_FTANH:
1241 case TargetOpcode::G_FLOG10:
1242 case TargetOpcode::G_FLOG:
1243 case TargetOpcode::G_FLOG2:
1244 case TargetOpcode::G_FEXP:
1245 case TargetOpcode::G_FEXP2:
1246 case TargetOpcode::G_FEXP10:
1247 case TargetOpcode::G_FCEIL:
1248 case TargetOpcode::G_FFLOOR:
1249 case TargetOpcode::G_FMINNUM:
1250 case TargetOpcode::G_FMAXNUM:
1251 case TargetOpcode::G_FSQRT:
1252 case TargetOpcode::G_FRINT:
1253 case TargetOpcode::G_FNEARBYINT:
1254 case TargetOpcode::G_INTRINSIC_TRUNC:
1255 case TargetOpcode::G_INTRINSIC_ROUND:
1256 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1257 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1258 unsigned Size = LLTy.getSizeInBits();
1259 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1260 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1261 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1262 return UnableToLegalize;
1263 }
1264 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1265 if (Status != Legalized)
1266 return Status;
1267 break;
1268 }
1269 case TargetOpcode::G_INTRINSIC_LRINT:
1270 case TargetOpcode::G_INTRINSIC_LLRINT: {
1271 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1272 unsigned Size = LLTy.getSizeInBits();
1273 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1274 Type *ITy = IntegerType::get(
1275 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1276 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1277 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1278 return UnableToLegalize;
1279 }
1280 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1282 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1283 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1284 if (Status != Legalized)
1285 return Status;
1286 MI.eraseFromParent();
1287 return Legalized;
1288 }
1289 case TargetOpcode::G_FPOWI:
1290 case TargetOpcode::G_FLDEXP: {
1291 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1292 unsigned Size = LLTy.getSizeInBits();
1293 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1294 Type *ITy = IntegerType::get(
1295 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1296 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1297 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1298 return UnableToLegalize;
1299 }
1300 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1302 {MI.getOperand(1).getReg(), HLTy, 0},
1303 {MI.getOperand(2).getReg(), ITy, 1}};
1304 Args[1].Flags[0].setSExt();
1306 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1307 Args, LocObserver, &MI);
1308 if (Status != Legalized)
1309 return Status;
1310 break;
1311 }
1312 case TargetOpcode::G_FPEXT:
1313 case TargetOpcode::G_FPTRUNC: {
1314 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1315 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1316 if (!FromTy || !ToTy)
1317 return UnableToLegalize;
1319 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1320 if (Status != Legalized)
1321 return Status;
1322 break;
1323 }
1324 case TargetOpcode::G_FCMP: {
1325 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 MI.eraseFromParent();
1329 return Status;
1330 }
1331 case TargetOpcode::G_FPTOSI:
1332 case TargetOpcode::G_FPTOUI: {
1333 // FIXME: Support other types
1334 Type *FromTy =
1335 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1336 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1337 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1338 return UnableToLegalize;
1340 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1341 if (Status != Legalized)
1342 return Status;
1343 break;
1344 }
1345 case TargetOpcode::G_SITOFP:
1346 case TargetOpcode::G_UITOFP: {
1347 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1348 Type *ToTy =
1349 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1350 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1351 return UnableToLegalize;
1352 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1354 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1355 LocObserver, TLI, IsSigned);
1356 if (Status != Legalized)
1357 return Status;
1358 break;
1359 }
1360 case TargetOpcode::G_ATOMICRMW_XCHG:
1361 case TargetOpcode::G_ATOMICRMW_ADD:
1362 case TargetOpcode::G_ATOMICRMW_SUB:
1363 case TargetOpcode::G_ATOMICRMW_AND:
1364 case TargetOpcode::G_ATOMICRMW_OR:
1365 case TargetOpcode::G_ATOMICRMW_XOR:
1366 case TargetOpcode::G_ATOMIC_CMPXCHG:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1369 if (Status != Legalized)
1370 return Status;
1371 break;
1372 }
1373 case TargetOpcode::G_BZERO:
1374 case TargetOpcode::G_MEMCPY:
1375 case TargetOpcode::G_MEMMOVE:
1376 case TargetOpcode::G_MEMSET: {
1377 LegalizeResult Result =
1378 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1379 if (Result != Legalized)
1380 return Result;
1381 MI.eraseFromParent();
1382 return Result;
1383 }
1384 case TargetOpcode::G_GET_FPENV:
1385 case TargetOpcode::G_GET_FPMODE: {
1386 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1387 if (Result != Legalized)
1388 return Result;
1389 break;
1390 }
1391 case TargetOpcode::G_SET_FPENV:
1392 case TargetOpcode::G_SET_FPMODE: {
1393 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1394 if (Result != Legalized)
1395 return Result;
1396 break;
1397 }
1398 case TargetOpcode::G_RESET_FPENV:
1399 case TargetOpcode::G_RESET_FPMODE: {
1400 LegalizeResult Result =
1401 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1402 if (Result != Legalized)
1403 return Result;
1404 break;
1405 }
1406 }
1407
1408 MI.eraseFromParent();
1409 return Legalized;
1410}
1411
1413 unsigned TypeIdx,
1414 LLT NarrowTy) {
1415 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1416 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1417
1418 switch (MI.getOpcode()) {
1419 default:
1420 return UnableToLegalize;
1421 case TargetOpcode::G_IMPLICIT_DEF: {
1422 Register DstReg = MI.getOperand(0).getReg();
1423 LLT DstTy = MRI.getType(DstReg);
1424
1425 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1426 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1427 // FIXME: Although this would also be legal for the general case, it causes
1428 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1429 // combines not being hit). This seems to be a problem related to the
1430 // artifact combiner.
1431 if (SizeOp0 % NarrowSize != 0) {
1432 LLT ImplicitTy = NarrowTy;
1433 if (DstTy.isVector())
1434 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1435
1436 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1437 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1438
1439 MI.eraseFromParent();
1440 return Legalized;
1441 }
1442
1443 int NumParts = SizeOp0 / NarrowSize;
1444
1446 for (int i = 0; i < NumParts; ++i)
1447 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1448
1449 if (DstTy.isVector())
1450 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1451 else
1452 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1453 MI.eraseFromParent();
1454 return Legalized;
1455 }
1456 case TargetOpcode::G_CONSTANT: {
1457 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1458 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1459 unsigned TotalSize = Ty.getSizeInBits();
1460 unsigned NarrowSize = NarrowTy.getSizeInBits();
1461 int NumParts = TotalSize / NarrowSize;
1462
1463 SmallVector<Register, 4> PartRegs;
1464 for (int I = 0; I != NumParts; ++I) {
1465 unsigned Offset = I * NarrowSize;
1466 auto K = MIRBuilder.buildConstant(NarrowTy,
1467 Val.lshr(Offset).trunc(NarrowSize));
1468 PartRegs.push_back(K.getReg(0));
1469 }
1470
1471 LLT LeftoverTy;
1472 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1473 SmallVector<Register, 1> LeftoverRegs;
1474 if (LeftoverBits != 0) {
1475 LeftoverTy = LLT::scalar(LeftoverBits);
1476 auto K = MIRBuilder.buildConstant(
1477 LeftoverTy,
1478 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1479 LeftoverRegs.push_back(K.getReg(0));
1480 }
1481
1482 insertParts(MI.getOperand(0).getReg(),
1483 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1484
1485 MI.eraseFromParent();
1486 return Legalized;
1487 }
1488 case TargetOpcode::G_SEXT:
1489 case TargetOpcode::G_ZEXT:
1490 case TargetOpcode::G_ANYEXT:
1491 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1492 case TargetOpcode::G_TRUNC: {
1493 if (TypeIdx != 1)
1494 return UnableToLegalize;
1495
1496 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1497 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1498 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1499 return UnableToLegalize;
1500 }
1501
1502 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1503 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1504 MI.eraseFromParent();
1505 return Legalized;
1506 }
1507 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1508 case TargetOpcode::G_FREEZE: {
1509 if (TypeIdx != 0)
1510 return UnableToLegalize;
1511
1512 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1513 // Should widen scalar first
1514 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1515 return UnableToLegalize;
1516
1517 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1519 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1520 Parts.push_back(
1521 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1522 .getReg(0));
1523 }
1524
1525 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1526 MI.eraseFromParent();
1527 return Legalized;
1528 }
1529 case TargetOpcode::G_ADD:
1530 case TargetOpcode::G_SUB:
1531 case TargetOpcode::G_SADDO:
1532 case TargetOpcode::G_SSUBO:
1533 case TargetOpcode::G_SADDE:
1534 case TargetOpcode::G_SSUBE:
1535 case TargetOpcode::G_UADDO:
1536 case TargetOpcode::G_USUBO:
1537 case TargetOpcode::G_UADDE:
1538 case TargetOpcode::G_USUBE:
1539 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1540 case TargetOpcode::G_MUL:
1541 case TargetOpcode::G_UMULH:
1542 return narrowScalarMul(MI, NarrowTy);
1543 case TargetOpcode::G_EXTRACT:
1544 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1545 case TargetOpcode::G_INSERT:
1546 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1547 case TargetOpcode::G_LOAD: {
1548 auto &LoadMI = cast<GLoad>(MI);
1549 Register DstReg = LoadMI.getDstReg();
1550 LLT DstTy = MRI.getType(DstReg);
1551 if (DstTy.isVector())
1552 return UnableToLegalize;
1553
1554 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1555 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1556 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1557 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1558 LoadMI.eraseFromParent();
1559 return Legalized;
1560 }
1561
1562 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1563 }
1564 case TargetOpcode::G_ZEXTLOAD:
1565 case TargetOpcode::G_SEXTLOAD: {
1566 auto &LoadMI = cast<GExtLoad>(MI);
1567 Register DstReg = LoadMI.getDstReg();
1568 Register PtrReg = LoadMI.getPointerReg();
1569
1570 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1571 auto &MMO = LoadMI.getMMO();
1572 unsigned MemSize = MMO.getSizeInBits().getValue();
1573
1574 if (MemSize == NarrowSize) {
1575 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1576 } else if (MemSize < NarrowSize) {
1577 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1578 } else if (MemSize > NarrowSize) {
1579 // FIXME: Need to split the load.
1580 return UnableToLegalize;
1581 }
1582
1583 if (isa<GZExtLoad>(LoadMI))
1584 MIRBuilder.buildZExt(DstReg, TmpReg);
1585 else
1586 MIRBuilder.buildSExt(DstReg, TmpReg);
1587
1588 LoadMI.eraseFromParent();
1589 return Legalized;
1590 }
1591 case TargetOpcode::G_STORE: {
1592 auto &StoreMI = cast<GStore>(MI);
1593
1594 Register SrcReg = StoreMI.getValueReg();
1595 LLT SrcTy = MRI.getType(SrcReg);
1596 if (SrcTy.isVector())
1597 return UnableToLegalize;
1598
1599 int NumParts = SizeOp0 / NarrowSize;
1600 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1601 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1602 if (SrcTy.isVector() && LeftoverBits != 0)
1603 return UnableToLegalize;
1604
1605 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1606 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1607 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1608 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1609 StoreMI.eraseFromParent();
1610 return Legalized;
1611 }
1612
1613 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1614 }
1615 case TargetOpcode::G_SELECT:
1616 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1617 case TargetOpcode::G_AND:
1618 case TargetOpcode::G_OR:
1619 case TargetOpcode::G_XOR: {
1620 // Legalize bitwise operation:
1621 // A = BinOp<Ty> B, C
1622 // into:
1623 // B1, ..., BN = G_UNMERGE_VALUES B
1624 // C1, ..., CN = G_UNMERGE_VALUES C
1625 // A1 = BinOp<Ty/N> B1, C2
1626 // ...
1627 // AN = BinOp<Ty/N> BN, CN
1628 // A = G_MERGE_VALUES A1, ..., AN
1629 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1630 }
1631 case TargetOpcode::G_SHL:
1632 case TargetOpcode::G_LSHR:
1633 case TargetOpcode::G_ASHR:
1634 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1635 case TargetOpcode::G_CTLZ:
1636 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1637 case TargetOpcode::G_CTTZ:
1638 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1639 case TargetOpcode::G_CTPOP:
1640 if (TypeIdx == 1)
1641 switch (MI.getOpcode()) {
1642 case TargetOpcode::G_CTLZ:
1643 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1644 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1645 case TargetOpcode::G_CTTZ:
1646 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1647 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1648 case TargetOpcode::G_CTPOP:
1649 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1650 default:
1651 return UnableToLegalize;
1652 }
1653
1655 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1657 return Legalized;
1658 case TargetOpcode::G_INTTOPTR:
1659 if (TypeIdx != 1)
1660 return UnableToLegalize;
1661
1663 narrowScalarSrc(MI, NarrowTy, 1);
1665 return Legalized;
1666 case TargetOpcode::G_PTRTOINT:
1667 if (TypeIdx != 0)
1668 return UnableToLegalize;
1669
1671 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1673 return Legalized;
1674 case TargetOpcode::G_PHI: {
1675 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1676 // NarrowSize.
1677 if (SizeOp0 % NarrowSize != 0)
1678 return UnableToLegalize;
1679
1680 unsigned NumParts = SizeOp0 / NarrowSize;
1681 SmallVector<Register, 2> DstRegs(NumParts);
1682 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1684 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1685 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1687 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1688 SrcRegs[i / 2], MIRBuilder, MRI);
1689 }
1690 MachineBasicBlock &MBB = *MI.getParent();
1692 for (unsigned i = 0; i < NumParts; ++i) {
1693 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1695 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1696 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1697 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1698 }
1700 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1702 MI.eraseFromParent();
1703 return Legalized;
1704 }
1705 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1706 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1707 if (TypeIdx != 2)
1708 return UnableToLegalize;
1709
1710 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1712 narrowScalarSrc(MI, NarrowTy, OpIdx);
1714 return Legalized;
1715 }
1716 case TargetOpcode::G_ICMP: {
1717 Register LHS = MI.getOperand(2).getReg();
1718 LLT SrcTy = MRI.getType(LHS);
1719 CmpInst::Predicate Pred =
1720 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1721
1722 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1723 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1724 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1725 LHSLeftoverRegs, MIRBuilder, MRI))
1726 return UnableToLegalize;
1727
1728 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1729 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1730 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1731 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1732 return UnableToLegalize;
1733
1734 // We now have the LHS and RHS of the compare split into narrow-type
1735 // registers, plus potentially some leftover type.
1736 Register Dst = MI.getOperand(0).getReg();
1737 LLT ResTy = MRI.getType(Dst);
1738 if (ICmpInst::isEquality(Pred)) {
1739 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1740 // them together. For each equal part, the result should be all 0s. For
1741 // each non-equal part, we'll get at least one 1.
1742 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1744 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1745 auto LHS = std::get<0>(LHSAndRHS);
1746 auto RHS = std::get<1>(LHSAndRHS);
1747 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1748 Xors.push_back(Xor);
1749 }
1750
1751 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1752 // to the desired narrow type so that we can OR them together later.
1753 SmallVector<Register, 4> WidenedXors;
1754 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1755 auto LHS = std::get<0>(LHSAndRHS);
1756 auto RHS = std::get<1>(LHSAndRHS);
1757 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1758 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1759 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1760 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1761 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1762 }
1763
1764 // Now, for each part we broke up, we know if they are equal/not equal
1765 // based off the G_XOR. We can OR these all together and compare against
1766 // 0 to get the result.
1767 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1768 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1769 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1770 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1771 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1772 } else {
1773 Register CmpIn;
1774 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1775 Register CmpOut;
1776 CmpInst::Predicate PartPred;
1777
1778 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1779 PartPred = Pred;
1780 CmpOut = Dst;
1781 } else {
1782 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1783 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1784 }
1785
1786 if (!CmpIn) {
1787 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1788 RHSPartRegs[I]);
1789 } else {
1790 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1791 RHSPartRegs[I]);
1793 LHSPartRegs[I], RHSPartRegs[I]);
1794 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1795 }
1796
1797 CmpIn = CmpOut;
1798 }
1799
1800 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1801 Register CmpOut;
1802 CmpInst::Predicate PartPred;
1803
1804 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1805 PartPred = Pred;
1806 CmpOut = Dst;
1807 } else {
1808 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1809 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1810 }
1811
1812 if (!CmpIn) {
1813 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1814 RHSLeftoverRegs[I]);
1815 } else {
1816 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1817 RHSLeftoverRegs[I]);
1818 auto CmpEq =
1820 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1821 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1822 }
1823
1824 CmpIn = CmpOut;
1825 }
1826 }
1827 MI.eraseFromParent();
1828 return Legalized;
1829 }
1830 case TargetOpcode::G_FCMP:
1831 if (TypeIdx != 0)
1832 return UnableToLegalize;
1833
1835 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1837 return Legalized;
1838
1839 case TargetOpcode::G_SEXT_INREG: {
1840 if (TypeIdx != 0)
1841 return UnableToLegalize;
1842
1843 int64_t SizeInBits = MI.getOperand(2).getImm();
1844
1845 // So long as the new type has more bits than the bits we're extending we
1846 // don't need to break it apart.
1847 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1849 // We don't lose any non-extension bits by truncating the src and
1850 // sign-extending the dst.
1851 MachineOperand &MO1 = MI.getOperand(1);
1852 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1853 MO1.setReg(TruncMIB.getReg(0));
1854
1855 MachineOperand &MO2 = MI.getOperand(0);
1856 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1858 MIRBuilder.buildSExt(MO2, DstExt);
1859 MO2.setReg(DstExt);
1861 return Legalized;
1862 }
1863
1864 // Break it apart. Components below the extension point are unmodified. The
1865 // component containing the extension point becomes a narrower SEXT_INREG.
1866 // Components above it are ashr'd from the component containing the
1867 // extension point.
1868 if (SizeOp0 % NarrowSize != 0)
1869 return UnableToLegalize;
1870 int NumParts = SizeOp0 / NarrowSize;
1871
1872 // List the registers where the destination will be scattered.
1874 // List the registers where the source will be split.
1876
1877 // Create all the temporary registers.
1878 for (int i = 0; i < NumParts; ++i) {
1879 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1880
1881 SrcRegs.push_back(SrcReg);
1882 }
1883
1884 // Explode the big arguments into smaller chunks.
1885 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1886
1887 Register AshrCstReg =
1888 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1889 .getReg(0);
1890 Register FullExtensionReg;
1891 Register PartialExtensionReg;
1892
1893 // Do the operation on each small part.
1894 for (int i = 0; i < NumParts; ++i) {
1895 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1896 DstRegs.push_back(SrcRegs[i]);
1897 PartialExtensionReg = DstRegs.back();
1898 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1899 assert(PartialExtensionReg &&
1900 "Expected to visit partial extension before full");
1901 if (FullExtensionReg) {
1902 DstRegs.push_back(FullExtensionReg);
1903 continue;
1904 }
1905 DstRegs.push_back(
1906 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1907 .getReg(0));
1908 FullExtensionReg = DstRegs.back();
1909 } else {
1910 DstRegs.push_back(
1912 .buildInstr(
1913 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1914 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1915 .getReg(0));
1916 PartialExtensionReg = DstRegs.back();
1917 }
1918 }
1919
1920 // Gather the destination registers into the final destination.
1921 Register DstReg = MI.getOperand(0).getReg();
1922 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1923 MI.eraseFromParent();
1924 return Legalized;
1925 }
1926 case TargetOpcode::G_BSWAP:
1927 case TargetOpcode::G_BITREVERSE: {
1928 if (SizeOp0 % NarrowSize != 0)
1929 return UnableToLegalize;
1930
1932 SmallVector<Register, 2> SrcRegs, DstRegs;
1933 unsigned NumParts = SizeOp0 / NarrowSize;
1934 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1935 MIRBuilder, MRI);
1936
1937 for (unsigned i = 0; i < NumParts; ++i) {
1938 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1939 {SrcRegs[NumParts - 1 - i]});
1940 DstRegs.push_back(DstPart.getReg(0));
1941 }
1942
1943 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1944
1946 MI.eraseFromParent();
1947 return Legalized;
1948 }
1949 case TargetOpcode::G_PTR_ADD:
1950 case TargetOpcode::G_PTRMASK: {
1951 if (TypeIdx != 1)
1952 return UnableToLegalize;
1954 narrowScalarSrc(MI, NarrowTy, 2);
1956 return Legalized;
1957 }
1958 case TargetOpcode::G_FPTOUI:
1959 case TargetOpcode::G_FPTOSI:
1960 case TargetOpcode::G_FPTOUI_SAT:
1961 case TargetOpcode::G_FPTOSI_SAT:
1962 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1963 case TargetOpcode::G_FPEXT:
1964 if (TypeIdx != 0)
1965 return UnableToLegalize;
1967 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1969 return Legalized;
1970 case TargetOpcode::G_FLDEXP:
1971 case TargetOpcode::G_STRICT_FLDEXP:
1972 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1973 case TargetOpcode::G_VSCALE: {
1974 Register Dst = MI.getOperand(0).getReg();
1975 LLT Ty = MRI.getType(Dst);
1976
1977 // Assume VSCALE(1) fits into a legal integer
1978 const APInt One(NarrowTy.getSizeInBits(), 1);
1979 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1980 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1981 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1982 MIRBuilder.buildMul(Dst, ZExt, C);
1983
1984 MI.eraseFromParent();
1985 return Legalized;
1986 }
1987 }
1988}
1989
1991 LLT Ty = MRI.getType(Val);
1992 if (Ty.isScalar())
1993 return Val;
1994
1996 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1997 if (Ty.isPointer()) {
1998 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1999 return Register();
2000 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2001 }
2002
2003 Register NewVal = Val;
2004
2005 assert(Ty.isVector());
2006 if (Ty.isPointerVector())
2007 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2008 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2009}
2010
2012 unsigned OpIdx, unsigned ExtOpcode) {
2013 MachineOperand &MO = MI.getOperand(OpIdx);
2014 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2015 MO.setReg(ExtB.getReg(0));
2016}
2017
2019 unsigned OpIdx) {
2020 MachineOperand &MO = MI.getOperand(OpIdx);
2021 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2022 MO.setReg(ExtB.getReg(0));
2023}
2024
2026 unsigned OpIdx, unsigned TruncOpcode) {
2027 MachineOperand &MO = MI.getOperand(OpIdx);
2028 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2030 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2031 MO.setReg(DstExt);
2032}
2033
2035 unsigned OpIdx, unsigned ExtOpcode) {
2036 MachineOperand &MO = MI.getOperand(OpIdx);
2037 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2039 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2040 MO.setReg(DstTrunc);
2041}
2042
2044 unsigned OpIdx) {
2045 MachineOperand &MO = MI.getOperand(OpIdx);
2047 Register Dst = MO.getReg();
2048 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2049 MO.setReg(DstExt);
2051}
2052
2054 unsigned OpIdx) {
2055 MachineOperand &MO = MI.getOperand(OpIdx);
2058}
2059
2060void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2061 MachineOperand &Op = MI.getOperand(OpIdx);
2062 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2063}
2064
2065void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2066 MachineOperand &MO = MI.getOperand(OpIdx);
2067 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2069 MIRBuilder.buildBitcast(MO, CastDst);
2070 MO.setReg(CastDst);
2071}
2072
2074LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2075 LLT WideTy) {
2076 if (TypeIdx != 1)
2077 return UnableToLegalize;
2078
2079 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2080 if (DstTy.isVector())
2081 return UnableToLegalize;
2082
2083 LLT SrcTy = MRI.getType(Src1Reg);
2084 const int DstSize = DstTy.getSizeInBits();
2085 const int SrcSize = SrcTy.getSizeInBits();
2086 const int WideSize = WideTy.getSizeInBits();
2087 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2088
2089 unsigned NumOps = MI.getNumOperands();
2090 unsigned NumSrc = MI.getNumOperands() - 1;
2091 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2092
2093 if (WideSize >= DstSize) {
2094 // Directly pack the bits in the target type.
2095 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2096
2097 for (unsigned I = 2; I != NumOps; ++I) {
2098 const unsigned Offset = (I - 1) * PartSize;
2099
2100 Register SrcReg = MI.getOperand(I).getReg();
2101 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2102
2103 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2104
2105 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2106 MRI.createGenericVirtualRegister(WideTy);
2107
2108 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2109 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2110 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2111 ResultReg = NextResult;
2112 }
2113
2114 if (WideSize > DstSize)
2115 MIRBuilder.buildTrunc(DstReg, ResultReg);
2116 else if (DstTy.isPointer())
2117 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2118
2119 MI.eraseFromParent();
2120 return Legalized;
2121 }
2122
2123 // Unmerge the original values to the GCD type, and recombine to the next
2124 // multiple greater than the original type.
2125 //
2126 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2127 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2128 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2129 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2130 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2131 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2132 // %12:_(s12) = G_MERGE_VALUES %10, %11
2133 //
2134 // Padding with undef if necessary:
2135 //
2136 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2137 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2138 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2139 // %7:_(s2) = G_IMPLICIT_DEF
2140 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2141 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2142 // %10:_(s12) = G_MERGE_VALUES %8, %9
2143
2144 const int GCD = std::gcd(SrcSize, WideSize);
2145 LLT GCDTy = LLT::scalar(GCD);
2146
2148 SmallVector<Register, 8> NewMergeRegs;
2149 SmallVector<Register, 8> Unmerges;
2150 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2151
2152 // Decompose the original operands if they don't evenly divide.
2153 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2154 Register SrcReg = MO.getReg();
2155 if (GCD == SrcSize) {
2156 Unmerges.push_back(SrcReg);
2157 } else {
2158 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2159 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2160 Unmerges.push_back(Unmerge.getReg(J));
2161 }
2162 }
2163
2164 // Pad with undef to the next size that is a multiple of the requested size.
2165 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2166 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2167 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2168 Unmerges.push_back(UndefReg);
2169 }
2170
2171 const int PartsPerGCD = WideSize / GCD;
2172
2173 // Build merges of each piece.
2174 ArrayRef<Register> Slicer(Unmerges);
2175 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2176 auto Merge =
2177 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2178 NewMergeRegs.push_back(Merge.getReg(0));
2179 }
2180
2181 // A truncate may be necessary if the requested type doesn't evenly divide the
2182 // original result type.
2183 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2184 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2185 } else {
2186 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2187 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2188 }
2189
2190 MI.eraseFromParent();
2191 return Legalized;
2192}
2193
2195LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2196 LLT WideTy) {
2197 if (TypeIdx != 0)
2198 return UnableToLegalize;
2199
2200 int NumDst = MI.getNumOperands() - 1;
2201 Register SrcReg = MI.getOperand(NumDst).getReg();
2202 LLT SrcTy = MRI.getType(SrcReg);
2203 if (SrcTy.isVector())
2204 return UnableToLegalize;
2205
2206 Register Dst0Reg = MI.getOperand(0).getReg();
2207 LLT DstTy = MRI.getType(Dst0Reg);
2208 if (!DstTy.isScalar())
2209 return UnableToLegalize;
2210
2211 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2212 if (SrcTy.isPointer()) {
2214 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2215 LLVM_DEBUG(
2216 dbgs() << "Not casting non-integral address space integer\n");
2217 return UnableToLegalize;
2218 }
2219
2220 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2221 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2222 }
2223
2224 // Widen SrcTy to WideTy. This does not affect the result, but since the
2225 // user requested this size, it is probably better handled than SrcTy and
2226 // should reduce the total number of legalization artifacts.
2227 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2228 SrcTy = WideTy;
2229 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2230 }
2231
2232 // Theres no unmerge type to target. Directly extract the bits from the
2233 // source type
2234 unsigned DstSize = DstTy.getSizeInBits();
2235
2236 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2237 for (int I = 1; I != NumDst; ++I) {
2238 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2239 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2240 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2241 }
2242
2243 MI.eraseFromParent();
2244 return Legalized;
2245 }
2246
2247 // Extend the source to a wider type.
2248 LLT LCMTy = getLCMType(SrcTy, WideTy);
2249
2250 Register WideSrc = SrcReg;
2251 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2252 // TODO: If this is an integral address space, cast to integer and anyext.
2253 if (SrcTy.isPointer()) {
2254 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2255 return UnableToLegalize;
2256 }
2257
2258 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2259 }
2260
2261 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2262
2263 // Create a sequence of unmerges and merges to the original results. Since we
2264 // may have widened the source, we will need to pad the results with dead defs
2265 // to cover the source register.
2266 // e.g. widen s48 to s64:
2267 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2268 //
2269 // =>
2270 // %4:_(s192) = G_ANYEXT %0:_(s96)
2271 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2272 // ; unpack to GCD type, with extra dead defs
2273 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2274 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2275 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2276 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2277 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2278 const LLT GCDTy = getGCDType(WideTy, DstTy);
2279 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2280 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2281
2282 // Directly unmerge to the destination without going through a GCD type
2283 // if possible
2284 if (PartsPerRemerge == 1) {
2285 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2286
2287 for (int I = 0; I != NumUnmerge; ++I) {
2288 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2289
2290 for (int J = 0; J != PartsPerUnmerge; ++J) {
2291 int Idx = I * PartsPerUnmerge + J;
2292 if (Idx < NumDst)
2293 MIB.addDef(MI.getOperand(Idx).getReg());
2294 else {
2295 // Create dead def for excess components.
2296 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2297 }
2298 }
2299
2300 MIB.addUse(Unmerge.getReg(I));
2301 }
2302 } else {
2304 for (int J = 0; J != NumUnmerge; ++J)
2305 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2306
2307 SmallVector<Register, 8> RemergeParts;
2308 for (int I = 0; I != NumDst; ++I) {
2309 for (int J = 0; J < PartsPerRemerge; ++J) {
2310 const int Idx = I * PartsPerRemerge + J;
2311 RemergeParts.emplace_back(Parts[Idx]);
2312 }
2313
2314 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2315 RemergeParts.clear();
2316 }
2317 }
2318
2319 MI.eraseFromParent();
2320 return Legalized;
2321}
2322
2324LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2325 LLT WideTy) {
2326 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2327 unsigned Offset = MI.getOperand(2).getImm();
2328
2329 if (TypeIdx == 0) {
2330 if (SrcTy.isVector() || DstTy.isVector())
2331 return UnableToLegalize;
2332
2333 SrcOp Src(SrcReg);
2334 if (SrcTy.isPointer()) {
2335 // Extracts from pointers can be handled only if they are really just
2336 // simple integers.
2338 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2339 return UnableToLegalize;
2340
2341 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2342 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2343 SrcTy = SrcAsIntTy;
2344 }
2345
2346 if (DstTy.isPointer())
2347 return UnableToLegalize;
2348
2349 if (Offset == 0) {
2350 // Avoid a shift in the degenerate case.
2351 MIRBuilder.buildTrunc(DstReg,
2352 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2353 MI.eraseFromParent();
2354 return Legalized;
2355 }
2356
2357 // Do a shift in the source type.
2358 LLT ShiftTy = SrcTy;
2359 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2360 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2361 ShiftTy = WideTy;
2362 }
2363
2364 auto LShr = MIRBuilder.buildLShr(
2365 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2366 MIRBuilder.buildTrunc(DstReg, LShr);
2367 MI.eraseFromParent();
2368 return Legalized;
2369 }
2370
2371 if (SrcTy.isScalar()) {
2373 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2375 return Legalized;
2376 }
2377
2378 if (!SrcTy.isVector())
2379 return UnableToLegalize;
2380
2381 if (DstTy != SrcTy.getElementType())
2382 return UnableToLegalize;
2383
2384 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2385 return UnableToLegalize;
2386
2388 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2389
2390 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2391 Offset);
2392 widenScalarDst(MI, WideTy.getScalarType(), 0);
2394 return Legalized;
2395}
2396
2398LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2399 LLT WideTy) {
2400 if (TypeIdx != 0 || WideTy.isVector())
2401 return UnableToLegalize;
2403 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2404 widenScalarDst(MI, WideTy);
2406 return Legalized;
2407}
2408
2410LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2411 LLT WideTy) {
2412 unsigned Opcode;
2413 unsigned ExtOpcode;
2414 std::optional<Register> CarryIn;
2415 switch (MI.getOpcode()) {
2416 default:
2417 llvm_unreachable("Unexpected opcode!");
2418 case TargetOpcode::G_SADDO:
2419 Opcode = TargetOpcode::G_ADD;
2420 ExtOpcode = TargetOpcode::G_SEXT;
2421 break;
2422 case TargetOpcode::G_SSUBO:
2423 Opcode = TargetOpcode::G_SUB;
2424 ExtOpcode = TargetOpcode::G_SEXT;
2425 break;
2426 case TargetOpcode::G_UADDO:
2427 Opcode = TargetOpcode::G_ADD;
2428 ExtOpcode = TargetOpcode::G_ZEXT;
2429 break;
2430 case TargetOpcode::G_USUBO:
2431 Opcode = TargetOpcode::G_SUB;
2432 ExtOpcode = TargetOpcode::G_ZEXT;
2433 break;
2434 case TargetOpcode::G_SADDE:
2435 Opcode = TargetOpcode::G_UADDE;
2436 ExtOpcode = TargetOpcode::G_SEXT;
2437 CarryIn = MI.getOperand(4).getReg();
2438 break;
2439 case TargetOpcode::G_SSUBE:
2440 Opcode = TargetOpcode::G_USUBE;
2441 ExtOpcode = TargetOpcode::G_SEXT;
2442 CarryIn = MI.getOperand(4).getReg();
2443 break;
2444 case TargetOpcode::G_UADDE:
2445 Opcode = TargetOpcode::G_UADDE;
2446 ExtOpcode = TargetOpcode::G_ZEXT;
2447 CarryIn = MI.getOperand(4).getReg();
2448 break;
2449 case TargetOpcode::G_USUBE:
2450 Opcode = TargetOpcode::G_USUBE;
2451 ExtOpcode = TargetOpcode::G_ZEXT;
2452 CarryIn = MI.getOperand(4).getReg();
2453 break;
2454 }
2455
2456 if (TypeIdx == 1) {
2457 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2458
2460 if (CarryIn)
2461 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2462 widenScalarDst(MI, WideTy, 1);
2463
2465 return Legalized;
2466 }
2467
2468 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2469 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2470 // Do the arithmetic in the larger type.
2471 Register NewOp;
2472 if (CarryIn) {
2473 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2474 NewOp = MIRBuilder
2475 .buildInstr(Opcode, {WideTy, CarryOutTy},
2476 {LHSExt, RHSExt, *CarryIn})
2477 .getReg(0);
2478 } else {
2479 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2480 }
2481 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2482 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2483 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2484 // There is no overflow if the ExtOp is the same as NewOp.
2485 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2486 // Now trunc the NewOp to the original result.
2487 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2488 MI.eraseFromParent();
2489 return Legalized;
2490}
2491
2493LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2494 LLT WideTy) {
2495 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2496 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2497 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2498 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2499 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2500 // We can convert this to:
2501 // 1. Any extend iN to iM
2502 // 2. SHL by M-N
2503 // 3. [US][ADD|SUB|SHL]SAT
2504 // 4. L/ASHR by M-N
2505 //
2506 // It may be more efficient to lower this to a min and a max operation in
2507 // the higher precision arithmetic if the promoted operation isn't legal,
2508 // but this decision is up to the target's lowering request.
2509 Register DstReg = MI.getOperand(0).getReg();
2510
2511 unsigned NewBits = WideTy.getScalarSizeInBits();
2512 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2513
2514 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2515 // must not left shift the RHS to preserve the shift amount.
2516 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2517 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2518 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2519 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2520 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2521 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2522
2523 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2524 {ShiftL, ShiftR}, MI.getFlags());
2525
2526 // Use a shift that will preserve the number of sign bits when the trunc is
2527 // folded away.
2528 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2529 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2530
2531 MIRBuilder.buildTrunc(DstReg, Result);
2532 MI.eraseFromParent();
2533 return Legalized;
2534}
2535
2537LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2538 LLT WideTy) {
2539 if (TypeIdx == 1) {
2541 widenScalarDst(MI, WideTy, 1);
2543 return Legalized;
2544 }
2545
2546 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2547 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2548 LLT SrcTy = MRI.getType(LHS);
2549 LLT OverflowTy = MRI.getType(OriginalOverflow);
2550 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2551
2552 // To determine if the result overflowed in the larger type, we extend the
2553 // input to the larger type, do the multiply (checking if it overflows),
2554 // then also check the high bits of the result to see if overflow happened
2555 // there.
2556 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2557 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2558 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2559
2560 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2561 // so we don't need to check the overflow result of larger type Mulo.
2562 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2563
2564 unsigned MulOpc =
2565 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2566
2568 if (WideMulCanOverflow)
2569 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2570 {LeftOperand, RightOperand});
2571 else
2572 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2573
2574 auto Mul = Mulo->getOperand(0);
2575 MIRBuilder.buildTrunc(Result, Mul);
2576
2577 MachineInstrBuilder ExtResult;
2578 // Overflow occurred if it occurred in the larger type, or if the high part
2579 // of the result does not zero/sign-extend the low part. Check this second
2580 // possibility first.
2581 if (IsSigned) {
2582 // For signed, overflow occurred when the high part does not sign-extend
2583 // the low part.
2584 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2585 } else {
2586 // Unsigned overflow occurred when the high part does not zero-extend the
2587 // low part.
2588 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2589 }
2590
2591 if (WideMulCanOverflow) {
2592 auto Overflow =
2593 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2594 // Finally check if the multiplication in the larger type itself overflowed.
2595 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2596 } else {
2597 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2598 }
2599 MI.eraseFromParent();
2600 return Legalized;
2601}
2602
2605 unsigned Opcode = MI.getOpcode();
2606 switch (Opcode) {
2607 default:
2608 return UnableToLegalize;
2609 case TargetOpcode::G_ATOMICRMW_XCHG:
2610 case TargetOpcode::G_ATOMICRMW_ADD:
2611 case TargetOpcode::G_ATOMICRMW_SUB:
2612 case TargetOpcode::G_ATOMICRMW_AND:
2613 case TargetOpcode::G_ATOMICRMW_OR:
2614 case TargetOpcode::G_ATOMICRMW_XOR:
2615 case TargetOpcode::G_ATOMICRMW_MIN:
2616 case TargetOpcode::G_ATOMICRMW_MAX:
2617 case TargetOpcode::G_ATOMICRMW_UMIN:
2618 case TargetOpcode::G_ATOMICRMW_UMAX:
2619 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2621 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2622 widenScalarDst(MI, WideTy, 0);
2624 return Legalized;
2625 case TargetOpcode::G_ATOMIC_CMPXCHG:
2626 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2628 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2629 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2630 widenScalarDst(MI, WideTy, 0);
2632 return Legalized;
2633 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2634 if (TypeIdx == 0) {
2636 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2637 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2638 widenScalarDst(MI, WideTy, 0);
2640 return Legalized;
2641 }
2642 assert(TypeIdx == 1 &&
2643 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2645 widenScalarDst(MI, WideTy, 1);
2647 return Legalized;
2648 case TargetOpcode::G_EXTRACT:
2649 return widenScalarExtract(MI, TypeIdx, WideTy);
2650 case TargetOpcode::G_INSERT:
2651 return widenScalarInsert(MI, TypeIdx, WideTy);
2652 case TargetOpcode::G_MERGE_VALUES:
2653 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2654 case TargetOpcode::G_UNMERGE_VALUES:
2655 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2656 case TargetOpcode::G_SADDO:
2657 case TargetOpcode::G_SSUBO:
2658 case TargetOpcode::G_UADDO:
2659 case TargetOpcode::G_USUBO:
2660 case TargetOpcode::G_SADDE:
2661 case TargetOpcode::G_SSUBE:
2662 case TargetOpcode::G_UADDE:
2663 case TargetOpcode::G_USUBE:
2664 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2665 case TargetOpcode::G_UMULO:
2666 case TargetOpcode::G_SMULO:
2667 return widenScalarMulo(MI, TypeIdx, WideTy);
2668 case TargetOpcode::G_SADDSAT:
2669 case TargetOpcode::G_SSUBSAT:
2670 case TargetOpcode::G_SSHLSAT:
2671 case TargetOpcode::G_UADDSAT:
2672 case TargetOpcode::G_USUBSAT:
2673 case TargetOpcode::G_USHLSAT:
2674 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2675 case TargetOpcode::G_CTTZ:
2676 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2677 case TargetOpcode::G_CTLZ:
2678 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2679 case TargetOpcode::G_CTPOP: {
2680 if (TypeIdx == 0) {
2682 widenScalarDst(MI, WideTy, 0);
2684 return Legalized;
2685 }
2686
2687 Register SrcReg = MI.getOperand(1).getReg();
2688
2689 // First extend the input.
2690 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2691 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2692 ? TargetOpcode::G_ANYEXT
2693 : TargetOpcode::G_ZEXT;
2694 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2695 LLT CurTy = MRI.getType(SrcReg);
2696 unsigned NewOpc = Opcode;
2697 if (NewOpc == TargetOpcode::G_CTTZ) {
2698 // The count is the same in the larger type except if the original
2699 // value was zero. This can be handled by setting the bit just off
2700 // the top of the original type.
2701 auto TopBit =
2703 MIBSrc = MIRBuilder.buildOr(
2704 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2705 // Now we know the operand is non-zero, use the more relaxed opcode.
2706 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2707 }
2708
2709 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2710
2711 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2712 // An optimization where the result is the CTLZ after the left shift by
2713 // (Difference in widety and current ty), that is,
2714 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2715 // Result = ctlz MIBSrc
2716 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2717 MIRBuilder.buildConstant(WideTy, SizeDiff));
2718 }
2719
2720 // Perform the operation at the larger size.
2721 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2722 // This is already the correct result for CTPOP and CTTZs
2723 if (Opcode == TargetOpcode::G_CTLZ) {
2724 // The correct result is NewOp - (Difference in widety and current ty).
2725 MIBNewOp = MIRBuilder.buildSub(
2726 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2727 }
2728
2729 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2730 MI.eraseFromParent();
2731 return Legalized;
2732 }
2733 case TargetOpcode::G_BSWAP: {
2735 Register DstReg = MI.getOperand(0).getReg();
2736
2737 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2738 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2739 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2740 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2741
2742 MI.getOperand(0).setReg(DstExt);
2743
2745
2746 LLT Ty = MRI.getType(DstReg);
2747 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2748 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2749 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2750
2751 MIRBuilder.buildTrunc(DstReg, ShrReg);
2753 return Legalized;
2754 }
2755 case TargetOpcode::G_BITREVERSE: {
2757
2758 Register DstReg = MI.getOperand(0).getReg();
2759 LLT Ty = MRI.getType(DstReg);
2760 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2761
2762 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2763 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2764 MI.getOperand(0).setReg(DstExt);
2766
2767 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2768 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2769 MIRBuilder.buildTrunc(DstReg, Shift);
2771 return Legalized;
2772 }
2773 case TargetOpcode::G_FREEZE:
2774 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2776 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2777 widenScalarDst(MI, WideTy);
2779 return Legalized;
2780
2781 case TargetOpcode::G_ABS:
2783 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2784 widenScalarDst(MI, WideTy);
2786 return Legalized;
2787
2788 case TargetOpcode::G_ADD:
2789 case TargetOpcode::G_AND:
2790 case TargetOpcode::G_MUL:
2791 case TargetOpcode::G_OR:
2792 case TargetOpcode::G_XOR:
2793 case TargetOpcode::G_SUB:
2794 case TargetOpcode::G_SHUFFLE_VECTOR:
2795 // Perform operation at larger width (any extension is fines here, high bits
2796 // don't affect the result) and then truncate the result back to the
2797 // original type.
2799 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2800 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2801 widenScalarDst(MI, WideTy);
2803 return Legalized;
2804
2805 case TargetOpcode::G_SBFX:
2806 case TargetOpcode::G_UBFX:
2808
2809 if (TypeIdx == 0) {
2810 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2811 widenScalarDst(MI, WideTy);
2812 } else {
2813 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2814 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2815 }
2816
2818 return Legalized;
2819
2820 case TargetOpcode::G_SHL:
2822
2823 if (TypeIdx == 0) {
2824 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2825 widenScalarDst(MI, WideTy);
2826 } else {
2827 assert(TypeIdx == 1);
2828 // The "number of bits to shift" operand must preserve its value as an
2829 // unsigned integer:
2830 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2831 }
2832
2834 return Legalized;
2835
2836 case TargetOpcode::G_ROTR:
2837 case TargetOpcode::G_ROTL:
2838 if (TypeIdx != 1)
2839 return UnableToLegalize;
2840
2842 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2844 return Legalized;
2845
2846 case TargetOpcode::G_SDIV:
2847 case TargetOpcode::G_SREM:
2848 case TargetOpcode::G_SMIN:
2849 case TargetOpcode::G_SMAX:
2851 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2852 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2853 widenScalarDst(MI, WideTy);
2855 return Legalized;
2856
2857 case TargetOpcode::G_SDIVREM:
2859 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2860 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2861 widenScalarDst(MI, WideTy);
2862 widenScalarDst(MI, WideTy, 1);
2864 return Legalized;
2865
2866 case TargetOpcode::G_ASHR:
2867 case TargetOpcode::G_LSHR:
2869
2870 if (TypeIdx == 0) {
2871 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2872 : TargetOpcode::G_ZEXT;
2873
2874 widenScalarSrc(MI, WideTy, 1, CvtOp);
2875 widenScalarDst(MI, WideTy);
2876 } else {
2877 assert(TypeIdx == 1);
2878 // The "number of bits to shift" operand must preserve its value as an
2879 // unsigned integer:
2880 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2881 }
2882
2884 return Legalized;
2885 case TargetOpcode::G_UDIV:
2886 case TargetOpcode::G_UREM:
2888 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2889 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2890 widenScalarDst(MI, WideTy);
2892 return Legalized;
2893 case TargetOpcode::G_UDIVREM:
2895 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2896 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2897 widenScalarDst(MI, WideTy);
2898 widenScalarDst(MI, WideTy, 1);
2900 return Legalized;
2901 case TargetOpcode::G_UMIN:
2902 case TargetOpcode::G_UMAX: {
2903 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2904
2905 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2906 unsigned ExtOpc =
2908 getApproximateEVTForLLT(WideTy, Ctx))
2909 ? TargetOpcode::G_SEXT
2910 : TargetOpcode::G_ZEXT;
2911
2913 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2914 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2915 widenScalarDst(MI, WideTy);
2917 return Legalized;
2918 }
2919
2920 case TargetOpcode::G_SELECT:
2922 if (TypeIdx == 0) {
2923 // Perform operation at larger width (any extension is fine here, high
2924 // bits don't affect the result) and then truncate the result back to the
2925 // original type.
2926 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2927 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2928 widenScalarDst(MI, WideTy);
2929 } else {
2930 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2931 // Explicit extension is required here since high bits affect the result.
2932 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2933 }
2935 return Legalized;
2936
2937 case TargetOpcode::G_FPTOSI:
2938 case TargetOpcode::G_FPTOUI:
2939 case TargetOpcode::G_INTRINSIC_LRINT:
2940 case TargetOpcode::G_INTRINSIC_LLRINT:
2941 case TargetOpcode::G_IS_FPCLASS:
2943
2944 if (TypeIdx == 0)
2945 widenScalarDst(MI, WideTy);
2946 else
2947 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2948
2950 return Legalized;
2951 case TargetOpcode::G_SITOFP:
2953
2954 if (TypeIdx == 0)
2955 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2956 else
2957 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2958
2960 return Legalized;
2961 case TargetOpcode::G_UITOFP:
2963
2964 if (TypeIdx == 0)
2965 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2966 else
2967 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2968
2970 return Legalized;
2971 case TargetOpcode::G_FPTOSI_SAT:
2972 case TargetOpcode::G_FPTOUI_SAT:
2974
2975 if (TypeIdx == 0) {
2976 Register OldDst = MI.getOperand(0).getReg();
2977 LLT Ty = MRI.getType(OldDst);
2978 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2979 Register NewDst;
2980 MI.getOperand(0).setReg(ExtReg);
2981 uint64_t ShortBits = Ty.getScalarSizeInBits();
2982 uint64_t WideBits = WideTy.getScalarSizeInBits();
2984 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2985 // z = i16 fptosi_sat(a)
2986 // ->
2987 // x = i32 fptosi_sat(a)
2988 // y = smin(x, 32767)
2989 // z = smax(y, -32768)
2990 auto MaxVal = MIRBuilder.buildConstant(
2991 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2992 auto MinVal = MIRBuilder.buildConstant(
2993 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2994 Register MidReg =
2995 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2996 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2997 } else {
2998 // z = i16 fptoui_sat(a)
2999 // ->
3000 // x = i32 fptoui_sat(a)
3001 // y = smin(x, 65535)
3002 auto MaxVal = MIRBuilder.buildConstant(
3003 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3004 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3005 }
3006 MIRBuilder.buildTrunc(OldDst, NewDst);
3007 } else
3008 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3009
3011 return Legalized;
3012 case TargetOpcode::G_LOAD:
3013 case TargetOpcode::G_SEXTLOAD:
3014 case TargetOpcode::G_ZEXTLOAD:
3016 widenScalarDst(MI, WideTy);
3018 return Legalized;
3019
3020 case TargetOpcode::G_STORE: {
3021 if (TypeIdx != 0)
3022 return UnableToLegalize;
3023
3024 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3025 if (!Ty.isScalar())
3026 return UnableToLegalize;
3027
3029
3030 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3031 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3032 widenScalarSrc(MI, WideTy, 0, ExtType);
3033
3035 return Legalized;
3036 }
3037 case TargetOpcode::G_CONSTANT: {
3038 MachineOperand &SrcMO = MI.getOperand(1);
3040 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3041 MRI.getType(MI.getOperand(0).getReg()));
3042 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3043 ExtOpc == TargetOpcode::G_ANYEXT) &&
3044 "Illegal Extend");
3045 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3046 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3047 ? SrcVal.sext(WideTy.getSizeInBits())
3048 : SrcVal.zext(WideTy.getSizeInBits());
3050 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3051
3052 widenScalarDst(MI, WideTy);
3054 return Legalized;
3055 }
3056 case TargetOpcode::G_FCONSTANT: {
3057 // To avoid changing the bits of the constant due to extension to a larger
3058 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3059 MachineOperand &SrcMO = MI.getOperand(1);
3060 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3062 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3063 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3064 MI.eraseFromParent();
3065 return Legalized;
3066 }
3067 case TargetOpcode::G_IMPLICIT_DEF: {
3069 widenScalarDst(MI, WideTy);
3071 return Legalized;
3072 }
3073 case TargetOpcode::G_BRCOND:
3075 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3077 return Legalized;
3078
3079 case TargetOpcode::G_FCMP:
3081 if (TypeIdx == 0)
3082 widenScalarDst(MI, WideTy);
3083 else {
3084 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3085 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3086 }
3088 return Legalized;
3089
3090 case TargetOpcode::G_ICMP:
3092 if (TypeIdx == 0)
3093 widenScalarDst(MI, WideTy);
3094 else {
3095 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3096 CmpInst::Predicate Pred =
3097 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3098
3099 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3100 unsigned ExtOpcode =
3101 (CmpInst::isSigned(Pred) ||
3103 getApproximateEVTForLLT(WideTy, Ctx)))
3104 ? TargetOpcode::G_SEXT
3105 : TargetOpcode::G_ZEXT;
3106 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3107 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3108 }
3110 return Legalized;
3111
3112 case TargetOpcode::G_PTR_ADD:
3113 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3115 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3117 return Legalized;
3118
3119 case TargetOpcode::G_PHI: {
3120 assert(TypeIdx == 0 && "Expecting only Idx 0");
3121
3123 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3124 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3126 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3127 }
3128
3129 MachineBasicBlock &MBB = *MI.getParent();
3131 widenScalarDst(MI, WideTy);
3133 return Legalized;
3134 }
3135 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3136 if (TypeIdx == 0) {
3137 Register VecReg = MI.getOperand(1).getReg();
3138 LLT VecTy = MRI.getType(VecReg);
3140
3142 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3143 TargetOpcode::G_ANYEXT);
3144
3145 widenScalarDst(MI, WideTy, 0);
3147 return Legalized;
3148 }
3149
3150 if (TypeIdx != 2)
3151 return UnableToLegalize;
3153 // TODO: Probably should be zext
3154 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3156 return Legalized;
3157 }
3158 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3159 if (TypeIdx == 0) {
3161 const LLT WideEltTy = WideTy.getElementType();
3162
3163 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3164 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3165 widenScalarDst(MI, WideTy, 0);
3167 return Legalized;
3168 }
3169
3170 if (TypeIdx == 1) {
3172
3173 Register VecReg = MI.getOperand(1).getReg();
3174 LLT VecTy = MRI.getType(VecReg);
3175 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3176
3177 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3178 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3179 widenScalarDst(MI, WideVecTy, 0);
3181 return Legalized;
3182 }
3183
3184 if (TypeIdx == 2) {
3186 // TODO: Probably should be zext
3187 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3189 return Legalized;
3190 }
3191
3192 return UnableToLegalize;
3193 }
3194 case TargetOpcode::G_FADD:
3195 case TargetOpcode::G_FMUL:
3196 case TargetOpcode::G_FSUB:
3197 case TargetOpcode::G_FMA:
3198 case TargetOpcode::G_FMAD:
3199 case TargetOpcode::G_FNEG:
3200 case TargetOpcode::G_FABS:
3201 case TargetOpcode::G_FCANONICALIZE:
3202 case TargetOpcode::G_FMINNUM:
3203 case TargetOpcode::G_FMAXNUM:
3204 case TargetOpcode::G_FMINNUM_IEEE:
3205 case TargetOpcode::G_FMAXNUM_IEEE:
3206 case TargetOpcode::G_FMINIMUM:
3207 case TargetOpcode::G_FMAXIMUM:
3208 case TargetOpcode::G_FDIV:
3209 case TargetOpcode::G_FREM:
3210 case TargetOpcode::G_FCEIL:
3211 case TargetOpcode::G_FFLOOR:
3212 case TargetOpcode::G_FCOS:
3213 case TargetOpcode::G_FSIN:
3214 case TargetOpcode::G_FTAN:
3215 case TargetOpcode::G_FACOS:
3216 case TargetOpcode::G_FASIN:
3217 case TargetOpcode::G_FATAN:
3218 case TargetOpcode::G_FATAN2:
3219 case TargetOpcode::G_FCOSH:
3220 case TargetOpcode::G_FSINH:
3221 case TargetOpcode::G_FTANH:
3222 case TargetOpcode::G_FLOG10:
3223 case TargetOpcode::G_FLOG:
3224 case TargetOpcode::G_FLOG2:
3225 case TargetOpcode::G_FRINT:
3226 case TargetOpcode::G_FNEARBYINT:
3227 case TargetOpcode::G_FSQRT:
3228 case TargetOpcode::G_FEXP:
3229 case TargetOpcode::G_FEXP2:
3230 case TargetOpcode::G_FEXP10:
3231 case TargetOpcode::G_FPOW:
3232 case TargetOpcode::G_INTRINSIC_TRUNC:
3233 case TargetOpcode::G_INTRINSIC_ROUND:
3234 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3235 assert(TypeIdx == 0);
3237
3238 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3239 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3240
3241 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3243 return Legalized;
3244 case TargetOpcode::G_FPOWI:
3245 case TargetOpcode::G_FLDEXP:
3246 case TargetOpcode::G_STRICT_FLDEXP: {
3247 if (TypeIdx == 0) {
3248 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3249 return UnableToLegalize;
3250
3252 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3253 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3255 return Legalized;
3256 }
3257
3258 if (TypeIdx == 1) {
3259 // For some reason SelectionDAG tries to promote to a libcall without
3260 // actually changing the integer type for promotion.
3262 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3264 return Legalized;
3265 }
3266
3267 return UnableToLegalize;
3268 }
3269 case TargetOpcode::G_FFREXP: {
3271
3272 if (TypeIdx == 0) {
3273 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3274 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3275 } else {
3276 widenScalarDst(MI, WideTy, 1);
3277 }
3278
3280 return Legalized;
3281 }
3282 case TargetOpcode::G_INTTOPTR:
3283 if (TypeIdx != 1)
3284 return UnableToLegalize;
3285
3287 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3289 return Legalized;
3290 case TargetOpcode::G_PTRTOINT:
3291 if (TypeIdx != 0)
3292 return UnableToLegalize;
3293
3295 widenScalarDst(MI, WideTy, 0);
3297 return Legalized;
3298 case TargetOpcode::G_BUILD_VECTOR: {
3300
3301 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3302 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3303 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3304
3305 // Avoid changing the result vector type if the source element type was
3306 // requested.
3307 if (TypeIdx == 1) {
3308 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3309 } else {
3310 widenScalarDst(MI, WideTy, 0);
3311 }
3312
3314 return Legalized;
3315 }
3316 case TargetOpcode::G_SEXT_INREG:
3317 if (TypeIdx != 0)
3318 return UnableToLegalize;
3319
3321 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3322 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3324 return Legalized;
3325 case TargetOpcode::G_PTRMASK: {
3326 if (TypeIdx != 1)
3327 return UnableToLegalize;
3329 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3331 return Legalized;
3332 }
3333 case TargetOpcode::G_VECREDUCE_FADD:
3334 case TargetOpcode::G_VECREDUCE_FMUL:
3335 case TargetOpcode::G_VECREDUCE_FMIN:
3336 case TargetOpcode::G_VECREDUCE_FMAX:
3337 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3338 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3339 if (TypeIdx != 0)
3340 return UnableToLegalize;
3342 Register VecReg = MI.getOperand(1).getReg();
3343 LLT VecTy = MRI.getType(VecReg);
3344 LLT WideVecTy = VecTy.isVector()
3345 ? LLT::vector(VecTy.getElementCount(), WideTy)
3346 : WideTy;
3347 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3348 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3350 return Legalized;
3351 }
3352 case TargetOpcode::G_VSCALE: {
3353 MachineOperand &SrcMO = MI.getOperand(1);
3355 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3356 // The CImm is always a signed value
3357 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3359 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3360 widenScalarDst(MI, WideTy);
3362 return Legalized;
3363 }
3364 case TargetOpcode::G_SPLAT_VECTOR: {
3365 if (TypeIdx != 1)
3366 return UnableToLegalize;
3367
3369 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3371 return Legalized;
3372 }
3373 case TargetOpcode::G_INSERT_SUBVECTOR: {
3374 if (TypeIdx != 0)
3375 return UnableToLegalize;
3376
3377 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
3378 Register BigVec = IS.getBigVec();
3379 Register SubVec = IS.getSubVec();
3380
3381 LLT SubVecTy = MRI.getType(SubVec);
3382 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3383
3384 // Widen the G_INSERT_SUBVECTOR
3385 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3386 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3387 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3388 IS.getIndexImm());
3389
3390 // Truncate back down
3391 auto SplatZero = MIRBuilder.buildSplatVector(
3392 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3394 SplatZero);
3395
3396 MI.eraseFromParent();
3397
3398 return Legalized;
3399 }
3400 }
3401}
3402
3404 MachineIRBuilder &B, Register Src, LLT Ty) {
3405 auto Unmerge = B.buildUnmerge(Ty, Src);
3406 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3407 Pieces.push_back(Unmerge.getReg(I));
3408}
3409
3410static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3411 MachineIRBuilder &MIRBuilder) {
3412 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3413 MachineFunction &MF = MIRBuilder.getMF();
3414 const DataLayout &DL = MIRBuilder.getDataLayout();
3415 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3416 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3417 LLT DstLLT = MRI.getType(DstReg);
3418
3419 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3420
3421 auto Addr = MIRBuilder.buildConstantPool(
3422 AddrPtrTy,
3423 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3424
3425 MachineMemOperand *MMO =
3427 MachineMemOperand::MOLoad, DstLLT, Alignment);
3428
3429 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3430}
3431
3434 const MachineOperand &ConstOperand = MI.getOperand(1);
3435 const Constant *ConstantVal = ConstOperand.getCImm();
3436
3437 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3438 MI.eraseFromParent();
3439
3440 return Legalized;
3441}
3442
3445 const MachineOperand &ConstOperand = MI.getOperand(1);
3446 const Constant *ConstantVal = ConstOperand.getFPImm();
3447
3448 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3449 MI.eraseFromParent();
3450
3451 return Legalized;
3452}
3453
3456 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3457 if (SrcTy.isVector()) {
3458 LLT SrcEltTy = SrcTy.getElementType();
3460
3461 if (DstTy.isVector()) {
3462 int NumDstElt = DstTy.getNumElements();
3463 int NumSrcElt = SrcTy.getNumElements();
3464
3465 LLT DstEltTy = DstTy.getElementType();
3466 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3467 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3468
3469 // If there's an element size mismatch, insert intermediate casts to match
3470 // the result element type.
3471 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3472 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3473 //
3474 // =>
3475 //
3476 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3477 // %3:_(<2 x s8>) = G_BITCAST %2
3478 // %4:_(<2 x s8>) = G_BITCAST %3
3479 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3480 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3481 SrcPartTy = SrcEltTy;
3482 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3483 //
3484 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3485 //
3486 // =>
3487 //
3488 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3489 // %3:_(s16) = G_BITCAST %2
3490 // %4:_(s16) = G_BITCAST %3
3491 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3492 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3493 DstCastTy = DstEltTy;
3494 }
3495
3496 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3497 for (Register &SrcReg : SrcRegs)
3498 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3499 } else
3500 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3501
3502 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3503 MI.eraseFromParent();
3504 return Legalized;
3505 }
3506
3507 if (DstTy.isVector()) {
3509 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3510 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3511 MI.eraseFromParent();
3512 return Legalized;
3513 }
3514
3515 return UnableToLegalize;
3516}
3517
3518/// Figure out the bit offset into a register when coercing a vector index for
3519/// the wide element type. This is only for the case when promoting vector to
3520/// one with larger elements.
3521//
3522///
3523/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3524/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3526 Register Idx,
3527 unsigned NewEltSize,
3528 unsigned OldEltSize) {
3529 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3530 LLT IdxTy = B.getMRI()->getType(Idx);
3531
3532 // Now figure out the amount we need to shift to get the target bits.
3533 auto OffsetMask = B.buildConstant(
3534 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3535 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3536 return B.buildShl(IdxTy, OffsetIdx,
3537 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3538}
3539
3540/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3541/// is casting to a vector with a smaller element size, perform multiple element
3542/// extracts and merge the results. If this is coercing to a vector with larger
3543/// elements, index the bitcasted vector and extract the target element with bit
3544/// operations. This is intended to force the indexing in the native register
3545/// size for architectures that can dynamically index the register file.
3548 LLT CastTy) {
3549 if (TypeIdx != 1)
3550 return UnableToLegalize;
3551
3552 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3553
3554 LLT SrcEltTy = SrcVecTy.getElementType();
3555 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3556 unsigned OldNumElts = SrcVecTy.getNumElements();
3557
3558 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3559 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3560
3561 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3562 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3563 if (NewNumElts > OldNumElts) {
3564 // Decreasing the vector element size
3565 //
3566 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3567 // =>
3568 // v4i32:castx = bitcast x:v2i64
3569 //
3570 // i64 = bitcast
3571 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3572 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3573 //
3574 if (NewNumElts % OldNumElts != 0)
3575 return UnableToLegalize;
3576
3577 // Type of the intermediate result vector.
3578 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3579 LLT MidTy =
3580 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3581
3582 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3583
3584 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3585 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3586
3587 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3588 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3589 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3590 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3591 NewOps[I] = Elt.getReg(0);
3592 }
3593
3594 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3595 MIRBuilder.buildBitcast(Dst, NewVec);
3596 MI.eraseFromParent();
3597 return Legalized;
3598 }
3599
3600 if (NewNumElts < OldNumElts) {
3601 if (NewEltSize % OldEltSize != 0)
3602 return UnableToLegalize;
3603
3604 // This only depends on powers of 2 because we use bit tricks to figure out
3605 // the bit offset we need to shift to get the target element. A general
3606 // expansion could emit division/multiply.
3607 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3608 return UnableToLegalize;
3609
3610 // Increasing the vector element size.
3611 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3612 //
3613 // =>
3614 //
3615 // %cast = G_BITCAST %vec
3616 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3617 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3618 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3619 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3620 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3621 // %elt = G_TRUNC %elt_bits
3622
3623 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3624 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3625
3626 // Divide to get the index in the wider element type.
3627 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3628
3629 Register WideElt = CastVec;
3630 if (CastTy.isVector()) {
3631 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3632 ScaledIdx).getReg(0);
3633 }
3634
3635 // Compute the bit offset into the register of the target element.
3637 MIRBuilder, Idx, NewEltSize, OldEltSize);
3638
3639 // Shift the wide element to get the target element.
3640 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3641 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3642 MI.eraseFromParent();
3643 return Legalized;
3644 }
3645
3646 return UnableToLegalize;
3647}
3648
3649/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3650/// TargetReg, while preserving other bits in \p TargetReg.
3651///
3652/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3654 Register TargetReg, Register InsertReg,
3655 Register OffsetBits) {
3656 LLT TargetTy = B.getMRI()->getType(TargetReg);
3657 LLT InsertTy = B.getMRI()->getType(InsertReg);
3658 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3659 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3660
3661 // Produce a bitmask of the value to insert
3662 auto EltMask = B.buildConstant(
3663 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3664 InsertTy.getSizeInBits()));
3665 // Shift it into position
3666 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3667 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3668
3669 // Clear out the bits in the wide element
3670 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3671
3672 // The value to insert has all zeros already, so stick it into the masked
3673 // wide element.
3674 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3675}
3676
3677/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3678/// is increasing the element size, perform the indexing in the target element
3679/// type, and use bit operations to insert at the element position. This is
3680/// intended for architectures that can dynamically index the register file and
3681/// want to force indexing in the native register size.
3684 LLT CastTy) {
3685 if (TypeIdx != 0)
3686 return UnableToLegalize;
3687
3688 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3689 MI.getFirst4RegLLTs();
3690 LLT VecTy = DstTy;
3691
3692 LLT VecEltTy = VecTy.getElementType();
3693 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3694 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3695 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3696
3697 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3698 unsigned OldNumElts = VecTy.getNumElements();
3699
3700 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3701 if (NewNumElts < OldNumElts) {
3702 if (NewEltSize % OldEltSize != 0)
3703 return UnableToLegalize;
3704
3705 // This only depends on powers of 2 because we use bit tricks to figure out
3706 // the bit offset we need to shift to get the target element. A general
3707 // expansion could emit division/multiply.
3708 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3709 return UnableToLegalize;
3710
3711 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3712 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3713
3714 // Divide to get the index in the wider element type.
3715 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3716
3717 Register ExtractedElt = CastVec;
3718 if (CastTy.isVector()) {
3719 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3720 ScaledIdx).getReg(0);
3721 }
3722
3723 // Compute the bit offset into the register of the target element.
3725 MIRBuilder, Idx, NewEltSize, OldEltSize);
3726
3727 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3728 Val, OffsetBits);
3729 if (CastTy.isVector()) {
3731 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3732 }
3733
3734 MIRBuilder.buildBitcast(Dst, InsertedElt);
3735 MI.eraseFromParent();
3736 return Legalized;
3737 }
3738
3739 return UnableToLegalize;
3740}
3741
3742// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3743// those that have smaller than legal operands.
3744//
3745// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3746//
3747// ===>
3748//
3749// s32 = G_BITCAST <4 x s8>
3750// s32 = G_BITCAST <4 x s8>
3751// s32 = G_BITCAST <4 x s8>
3752// s32 = G_BITCAST <4 x s8>
3753// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3754// <16 x s8> = G_BITCAST <4 x s32>
3757 LLT CastTy) {
3758 // Convert it to CONCAT instruction
3759 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3760 if (!ConcatMI) {
3761 return UnableToLegalize;
3762 }
3763
3764 // Check if bitcast is Legal
3765 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3766 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3767
3768 // Check if the build vector is Legal
3769 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3770 return UnableToLegalize;
3771 }
3772
3773 // Bitcast the sources
3774 SmallVector<Register> BitcastRegs;
3775 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3776 BitcastRegs.push_back(
3777 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3778 .getReg(0));
3779 }
3780
3781 // Build the scalar values into a vector
3782 Register BuildReg =
3783 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3784 MIRBuilder.buildBitcast(DstReg, BuildReg);
3785
3786 MI.eraseFromParent();
3787 return Legalized;
3788}
3789
3790// This bitcasts a shuffle vector to a different type currently of the same
3791// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3792// will be used instead.
3793//
3794// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3795// ===>
3796// <4 x s64> = G_PTRTOINT <4 x p0>
3797// <4 x s64> = G_PTRTOINT <4 x p0>
3798// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3799// <16 x p0> = G_INTTOPTR <16 x s64>
3802 LLT CastTy) {
3803 auto ShuffleMI = cast<GShuffleVector>(&MI);
3804 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3805 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3806
3807 // We currently only handle vectors of the same size.
3808 if (TypeIdx != 0 ||
3809 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3810 CastTy.getElementCount() != DstTy.getElementCount())
3811 return UnableToLegalize;
3812
3813 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3814
3815 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3816 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3817 auto Shuf =
3818 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3819 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3820
3821 MI.eraseFromParent();
3822 return Legalized;
3823}
3824
3825/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3826///
3827/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3828///
3829/// ===>
3830///
3831/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3832/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3833/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3836 LLT CastTy) {
3837 auto ES = cast<GExtractSubvector>(&MI);
3838
3839 if (!CastTy.isVector())
3840 return UnableToLegalize;
3841
3842 if (TypeIdx != 0)
3843 return UnableToLegalize;
3844
3845 Register Dst = ES->getReg(0);
3846 Register Src = ES->getSrcVec();
3847 uint64_t Idx = ES->getIndexImm();
3848
3850
3851 LLT DstTy = MRI.getType(Dst);
3852 LLT SrcTy = MRI.getType(Src);
3853 ElementCount DstTyEC = DstTy.getElementCount();
3854 ElementCount SrcTyEC = SrcTy.getElementCount();
3855 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3856 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3857
3858 if (DstTy == CastTy)
3859 return Legalized;
3860
3861 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3862 return UnableToLegalize;
3863
3864 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3865 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3866 if (CastEltSize < DstEltSize)
3867 return UnableToLegalize;
3868
3869 auto AdjustAmt = CastEltSize / DstEltSize;
3870 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3871 SrcTyMinElts % AdjustAmt != 0)
3872 return UnableToLegalize;
3873
3874 Idx /= AdjustAmt;
3875 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3876 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3877 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3878 MIRBuilder.buildBitcast(Dst, PromotedES);
3879
3880 ES->eraseFromParent();
3881 return Legalized;
3882}
3883
3884/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3885///
3886/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3887/// <vscale x 8 x i1>,
3888/// N
3889///
3890/// ===>
3891///
3892/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3893/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3894/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3895/// <vscale x 1 x i8>, N / 8
3896/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3899 LLT CastTy) {
3900 auto ES = cast<GInsertSubvector>(&MI);
3901
3902 if (!CastTy.isVector())
3903 return UnableToLegalize;
3904
3905 if (TypeIdx != 0)
3906 return UnableToLegalize;
3907
3908 Register Dst = ES->getReg(0);
3909 Register BigVec = ES->getBigVec();
3910 Register SubVec = ES->getSubVec();
3911 uint64_t Idx = ES->getIndexImm();
3912
3914
3915 LLT DstTy = MRI.getType(Dst);
3916 LLT BigVecTy = MRI.getType(BigVec);
3917 LLT SubVecTy = MRI.getType(SubVec);
3918
3919 if (DstTy == CastTy)
3920 return Legalized;
3921
3922 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3923 return UnableToLegalize;
3924
3925 ElementCount DstTyEC = DstTy.getElementCount();
3926 ElementCount BigVecTyEC = BigVecTy.getElementCount();
3927 ElementCount SubVecTyEC = SubVecTy.getElementCount();
3928 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3929 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
3930 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
3931
3932 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3933 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3934 if (CastEltSize < DstEltSize)
3935 return UnableToLegalize;
3936
3937 auto AdjustAmt = CastEltSize / DstEltSize;
3938 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3939 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3940 return UnableToLegalize;
3941
3942 Idx /= AdjustAmt;
3943 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3944 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3945 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
3946 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
3947 auto PromotedIS =
3948 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
3949 MIRBuilder.buildBitcast(Dst, PromotedIS);
3950
3951 ES->eraseFromParent();
3952 return Legalized;
3953}
3954
3956 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3957 Register DstReg = LoadMI.getDstReg();
3958 Register PtrReg = LoadMI.getPointerReg();
3959 LLT DstTy = MRI.getType(DstReg);
3960 MachineMemOperand &MMO = LoadMI.getMMO();
3961 LLT MemTy = MMO.getMemoryType();
3963
3964 unsigned MemSizeInBits = MemTy.getSizeInBits();
3965 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3966
3967 if (MemSizeInBits != MemStoreSizeInBits) {
3968 if (MemTy.isVector())
3969 return UnableToLegalize;
3970
3971 // Promote to a byte-sized load if not loading an integral number of
3972 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3973 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3974 MachineMemOperand *NewMMO =
3975 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3976
3977 Register LoadReg = DstReg;
3978 LLT LoadTy = DstTy;
3979
3980 // If this wasn't already an extending load, we need to widen the result
3981 // register to avoid creating a load with a narrower result than the source.
3982 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3983 LoadTy = WideMemTy;
3984 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3985 }
3986
3987 if (isa<GSExtLoad>(LoadMI)) {
3988 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3989 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3990 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3991 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3992 // The extra bits are guaranteed to be zero, since we stored them that
3993 // way. A zext load from Wide thus automatically gives zext from MemVT.
3994 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3995 } else {
3996 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3997 }
3998
3999 if (DstTy != LoadTy)
4000 MIRBuilder.buildTrunc(DstReg, LoadReg);
4001
4002 LoadMI.eraseFromParent();
4003 return Legalized;
4004 }
4005
4006 // Big endian lowering not implemented.
4008 return UnableToLegalize;
4009
4010 // This load needs splitting into power of 2 sized loads.
4011 //
4012 // Our strategy here is to generate anyextending loads for the smaller
4013 // types up to next power-2 result type, and then combine the two larger
4014 // result values together, before truncating back down to the non-pow-2
4015 // type.
4016 // E.g. v1 = i24 load =>
4017 // v2 = i32 zextload (2 byte)
4018 // v3 = i32 load (1 byte)
4019 // v4 = i32 shl v3, 16
4020 // v5 = i32 or v4, v2
4021 // v1 = i24 trunc v5
4022 // By doing this we generate the correct truncate which should get
4023 // combined away as an artifact with a matching extend.
4024
4025 uint64_t LargeSplitSize, SmallSplitSize;
4026
4027 if (!isPowerOf2_32(MemSizeInBits)) {
4028 // This load needs splitting into power of 2 sized loads.
4029 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4030 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4031 } else {
4032 // This is already a power of 2, but we still need to split this in half.
4033 //
4034 // Assume we're being asked to decompose an unaligned load.
4035 // TODO: If this requires multiple splits, handle them all at once.
4036 auto &Ctx = MF.getFunction().getContext();
4037 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4038 return UnableToLegalize;
4039
4040 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4041 }
4042
4043 if (MemTy.isVector()) {
4044 // TODO: Handle vector extloads
4045 if (MemTy != DstTy)
4046 return UnableToLegalize;
4047
4048 // TODO: We can do better than scalarizing the vector and at least split it
4049 // in half.
4050 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4051 }
4052
4053 MachineMemOperand *LargeMMO =
4054 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4055 MachineMemOperand *SmallMMO =
4056 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4057
4058 LLT PtrTy = MRI.getType(PtrReg);
4059 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4060 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4061 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4062 PtrReg, *LargeMMO);
4063
4064 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4065 LargeSplitSize / 8);
4066 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4067 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
4068 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4069 SmallPtr, *SmallMMO);
4070
4071 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4072 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4073
4074 if (AnyExtTy == DstTy)
4075 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4076 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4077 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4078 MIRBuilder.buildTrunc(DstReg, {Or});
4079 } else {
4080 assert(DstTy.isPointer() && "expected pointer");
4081 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4082
4083 // FIXME: We currently consider this to be illegal for non-integral address
4084 // spaces, but we need still need a way to reinterpret the bits.
4085 MIRBuilder.buildIntToPtr(DstReg, Or);
4086 }
4087
4088 LoadMI.eraseFromParent();
4089 return Legalized;
4090}
4091
4093 // Lower a non-power of 2 store into multiple pow-2 stores.
4094 // E.g. split an i24 store into an i16 store + i8 store.
4095 // We do this by first extending the stored value to the next largest power
4096 // of 2 type, and then using truncating stores to store the components.
4097 // By doing this, likewise with G_LOAD, generate an extend that can be
4098 // artifact-combined away instead of leaving behind extracts.
4099 Register SrcReg = StoreMI.getValueReg();
4100 Register PtrReg = StoreMI.getPointerReg();
4101 LLT SrcTy = MRI.getType(SrcReg);
4103 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4104 LLT MemTy = MMO.getMemoryType();
4105
4106 unsigned StoreWidth = MemTy.getSizeInBits();
4107 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4108
4109 if (StoreWidth != StoreSizeInBits) {
4110 if (SrcTy.isVector())
4111 return UnableToLegalize;
4112
4113 // Promote to a byte-sized store with upper bits zero if not
4114 // storing an integral number of bytes. For example, promote
4115 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4116 LLT WideTy = LLT::scalar(StoreSizeInBits);
4117
4118 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4119 // Avoid creating a store with a narrower source than result.
4120 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4121 SrcTy = WideTy;
4122 }
4123
4124 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4125
4126 MachineMemOperand *NewMMO =
4127 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4128 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4129 StoreMI.eraseFromParent();
4130 return Legalized;
4131 }
4132
4133 if (MemTy.isVector()) {
4134 // TODO: Handle vector trunc stores
4135 if (MemTy != SrcTy)
4136 return UnableToLegalize;
4137
4138 // TODO: We can do better than scalarizing the vector and at least split it
4139 // in half.
4140 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4141 }
4142
4143 unsigned MemSizeInBits = MemTy.getSizeInBits();
4144 uint64_t LargeSplitSize, SmallSplitSize;
4145
4146 if (!isPowerOf2_32(MemSizeInBits)) {
4147 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4148 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4149 } else {
4150 auto &Ctx = MF.getFunction().getContext();
4151 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4152 return UnableToLegalize; // Don't know what we're being asked to do.
4153
4154 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4155 }
4156
4157 // Extend to the next pow-2. If this store was itself the result of lowering,
4158 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4159 // that's wider than the stored size.
4160 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4161 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4162
4163 if (SrcTy.isPointer()) {
4164 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4165 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4166 }
4167
4168 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4169
4170 // Obtain the smaller value by shifting away the larger value.
4171 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4172 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4173
4174 // Generate the PtrAdd and truncating stores.
4175 LLT PtrTy = MRI.getType(PtrReg);
4176 auto OffsetCst = MIRBuilder.buildConstant(
4177 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4178 auto SmallPtr =
4179 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
4180
4181 MachineMemOperand *LargeMMO =
4182 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4183 MachineMemOperand *SmallMMO =
4184 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4185 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4186 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4187 StoreMI.eraseFromParent();
4188 return Legalized;
4189}
4190
4192LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4193 switch (MI.getOpcode()) {
4194 case TargetOpcode::G_LOAD: {
4195 if (TypeIdx != 0)
4196 return UnableToLegalize;
4197 MachineMemOperand &MMO = **MI.memoperands_begin();
4198
4199 // Not sure how to interpret a bitcast of an extending load.
4200 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4201 return UnableToLegalize;
4202
4204 bitcastDst(MI, CastTy, 0);
4205 MMO.setType(CastTy);
4206 // The range metadata is no longer valid when reinterpreted as a different
4207 // type.
4208 MMO.clearRanges();
4210 return Legalized;
4211 }
4212 case TargetOpcode::G_STORE: {
4213 if (TypeIdx != 0)
4214 return UnableToLegalize;
4215
4216 MachineMemOperand &MMO = **MI.memoperands_begin();
4217
4218 // Not sure how to interpret a bitcast of a truncating store.
4219 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4220 return UnableToLegalize;
4221
4223 bitcastSrc(MI, CastTy, 0);
4224 MMO.setType(CastTy);
4226 return Legalized;
4227 }
4228 case TargetOpcode::G_SELECT: {
4229 if (TypeIdx != 0)
4230 return UnableToLegalize;
4231
4232 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4233 LLVM_DEBUG(
4234 dbgs() << "bitcast action not implemented for vector select\n");
4235 return UnableToLegalize;
4236 }
4237
4239 bitcastSrc(MI, CastTy, 2);
4240 bitcastSrc(MI, CastTy, 3);
4241 bitcastDst(MI, CastTy, 0);
4243 return Legalized;
4244 }
4245 case TargetOpcode::G_AND:
4246 case TargetOpcode::G_OR:
4247 case TargetOpcode::G_XOR: {
4249 bitcastSrc(MI, CastTy, 1);
4250 bitcastSrc(MI, CastTy, 2);
4251 bitcastDst(MI, CastTy, 0);
4253 return Legalized;
4254 }
4255 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4256 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4257 case TargetOpcode::G_INSERT_VECTOR_ELT:
4258 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4259 case TargetOpcode::G_CONCAT_VECTORS:
4260 return bitcastConcatVector(MI, TypeIdx, CastTy);
4261 case TargetOpcode::G_SHUFFLE_VECTOR:
4262 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4263 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4264 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4265 case TargetOpcode::G_INSERT_SUBVECTOR:
4266 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4267 default:
4268 return UnableToLegalize;
4269 }
4270}
4271
4272// Legalize an instruction by changing the opcode in place.
4273void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4275 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4277}
4278
4280LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4281 using namespace TargetOpcode;
4282
4283 switch(MI.getOpcode()) {
4284 default:
4285 return UnableToLegalize;
4286 case TargetOpcode::G_FCONSTANT:
4287 return lowerFConstant(MI);
4288 case TargetOpcode::G_BITCAST:
4289 return lowerBitcast(MI);
4290 case TargetOpcode::G_SREM:
4291 case TargetOpcode::G_UREM: {
4292 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4293 auto Quot =
4294 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4295 {MI.getOperand(1), MI.getOperand(2)});
4296
4297 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4298 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4299 MI.eraseFromParent();
4300 return Legalized;
4301 }
4302 case TargetOpcode::G_SADDO:
4303 case TargetOpcode::G_SSUBO:
4304 return lowerSADDO_SSUBO(MI);
4305 case TargetOpcode::G_UMULH:
4306 case TargetOpcode::G_SMULH:
4307 return lowerSMULH_UMULH(MI);
4308 case TargetOpcode::G_SMULO:
4309 case TargetOpcode::G_UMULO: {
4310 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4311 // result.
4312 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4313 LLT Ty = MRI.getType(Res);
4314
4315 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4316 ? TargetOpcode::G_SMULH
4317 : TargetOpcode::G_UMULH;
4318
4320 const auto &TII = MIRBuilder.getTII();
4321 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4322 MI.removeOperand(1);
4324
4325 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4326 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4327
4328 // Move insert point forward so we can use the Res register if needed.
4330
4331 // For *signed* multiply, overflow is detected by checking:
4332 // (hi != (lo >> bitwidth-1))
4333 if (Opcode == TargetOpcode::G_SMULH) {
4334 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4335 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4336 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4337 } else {
4338 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4339 }
4340 return Legalized;
4341 }
4342 case TargetOpcode::G_FNEG: {
4343 auto [Res, SubByReg] = MI.getFirst2Regs();
4344 LLT Ty = MRI.getType(Res);
4345
4346 auto SignMask = MIRBuilder.buildConstant(
4348 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4349 MI.eraseFromParent();
4350 return Legalized;
4351 }
4352 case TargetOpcode::G_FSUB:
4353 case TargetOpcode::G_STRICT_FSUB: {
4354 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4355 LLT Ty = MRI.getType(Res);
4356
4357 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4358 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4359
4360 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4361 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4362 else
4363 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4364
4365 MI.eraseFromParent();
4366 return Legalized;
4367 }
4368 case TargetOpcode::G_FMAD:
4369 return lowerFMad(MI);
4370 case TargetOpcode::G_FFLOOR:
4371 return lowerFFloor(MI);
4372 case TargetOpcode::G_LROUND:
4373 case TargetOpcode::G_LLROUND: {
4374 Register DstReg = MI.getOperand(0).getReg();
4375 Register SrcReg = MI.getOperand(1).getReg();
4376 LLT SrcTy = MRI.getType(SrcReg);
4377 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4378 {SrcReg});
4379 MIRBuilder.buildFPTOSI(DstReg, Round);
4380 MI.eraseFromParent();
4381 return Legalized;
4382 }
4383 case TargetOpcode::G_INTRINSIC_ROUND:
4384 return lowerIntrinsicRound(MI);
4385 case TargetOpcode::G_FRINT: {
4386 // Since round even is the assumed rounding mode for unconstrained FP
4387 // operations, rint and roundeven are the same operation.
4388 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4389 return Legalized;
4390 }
4391 case TargetOpcode::G_INTRINSIC_LRINT:
4392 case TargetOpcode::G_INTRINSIC_LLRINT: {
4393 Register DstReg = MI.getOperand(0).getReg();
4394 Register SrcReg = MI.getOperand(1).getReg();
4395 LLT SrcTy = MRI.getType(SrcReg);
4396 auto Round =
4397 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4398 MIRBuilder.buildFPTOSI(DstReg, Round);
4399 MI.eraseFromParent();
4400 return Legalized;
4401 }
4402 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4403 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4404 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4405 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4406 **MI.memoperands_begin());
4407 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4408 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4409 MI.eraseFromParent();
4410 return Legalized;
4411 }
4412 case TargetOpcode::G_LOAD:
4413 case TargetOpcode::G_SEXTLOAD:
4414 case TargetOpcode::G_ZEXTLOAD:
4415 return lowerLoad(cast<GAnyLoad>(MI));
4416 case TargetOpcode::G_STORE:
4417 return lowerStore(cast<GStore>(MI));
4418 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4419 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4420 case TargetOpcode::G_CTLZ:
4421 case TargetOpcode::G_CTTZ:
4422 case TargetOpcode::G_CTPOP:
4423 return lowerBitCount(MI);
4424 case G_UADDO: {
4425 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4426
4427 Register NewRes = MRI.cloneVirtualRegister(Res);
4428
4429 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4430 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4431
4432 MIRBuilder.buildCopy(Res, NewRes);
4433
4434 MI.eraseFromParent();
4435 return Legalized;
4436 }
4437 case G_UADDE: {
4438 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4439 const LLT CondTy = MRI.getType(CarryOut);
4440 const LLT Ty = MRI.getType(Res);
4441
4442 Register NewRes = MRI.cloneVirtualRegister(Res);
4443
4444 // Initial add of the two operands.
4445 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4446
4447 // Initial check for carry.
4448 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4449
4450 // Add the sum and the carry.
4451 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4452 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4453
4454 // Second check for carry. We can only carry if the initial sum is all 1s
4455 // and the carry is set, resulting in a new sum of 0.
4456 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4457 auto ResEqZero =
4458 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4459 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4460 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4461
4462 MIRBuilder.buildCopy(Res, NewRes);
4463
4464 MI.eraseFromParent();
4465 return Legalized;
4466 }
4467 case G_USUBO: {
4468 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4469
4470 MIRBuilder.buildSub(Res, LHS, RHS);
4472
4473 MI.eraseFromParent();
4474 return Legalized;
4475 }
4476 case G_USUBE: {
4477 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4478 const LLT CondTy = MRI.getType(BorrowOut);
4479 const LLT Ty = MRI.getType(Res);
4480
4481 // Initial subtract of the two operands.
4482 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4483
4484 // Initial check for borrow.
4485 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4486
4487 // Subtract the borrow from the first subtract.
4488 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4489 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4490
4491 // Second check for borrow. We can only borrow if the initial difference is
4492 // 0 and the borrow is set, resulting in a new difference of all 1s.
4493 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4494 auto TmpResEqZero =
4495 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4496 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4497 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4498
4499 MI.eraseFromParent();
4500 return Legalized;
4501 }
4502 case G_UITOFP:
4503 return lowerUITOFP(MI);
4504 case G_SITOFP:
4505 return lowerSITOFP(MI);
4506 case G_FPTOUI:
4507 return lowerFPTOUI(MI);
4508 case G_FPTOSI:
4509 return lowerFPTOSI(MI);
4510 case G_FPTOUI_SAT:
4511 case G_FPTOSI_SAT:
4512 return lowerFPTOINT_SAT(MI);
4513 case G_FPTRUNC:
4514 return lowerFPTRUNC(MI);
4515 case G_FPOWI:
4516 return lowerFPOWI(MI);
4517 case G_SMIN:
4518 case G_SMAX:
4519 case G_UMIN:
4520 case G_UMAX:
4521 return lowerMinMax(MI);
4522 case G_SCMP:
4523 case G_UCMP:
4524 return lowerThreewayCompare(MI);
4525 case G_FCOPYSIGN:
4526 return lowerFCopySign(MI);
4527 case G_FMINNUM:
4528 case G_FMAXNUM:
4529 return lowerFMinNumMaxNum(MI);
4530 case G_MERGE_VALUES:
4531 return lowerMergeValues(MI);
4532 case G_UNMERGE_VALUES:
4533 return lowerUnmergeValues(MI);
4534 case TargetOpcode::G_SEXT_INREG: {
4535 assert(MI.getOperand(2).isImm() && "Expected immediate");
4536 int64_t SizeInBits = MI.getOperand(2).getImm();
4537
4538 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4539 LLT DstTy = MRI.getType(DstReg);
4540 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4541
4542 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4543 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4544 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4545 MI.eraseFromParent();
4546 return Legalized;
4547 }
4548 case G_EXTRACT_VECTOR_ELT:
4549 case G_INSERT_VECTOR_ELT:
4551 case G_SHUFFLE_VECTOR:
4552 return lowerShuffleVector(MI);
4553 case G_VECTOR_COMPRESS:
4554 return lowerVECTOR_COMPRESS(MI);
4555 case G_DYN_STACKALLOC:
4556 return lowerDynStackAlloc(MI);
4557 case G_STACKSAVE:
4558 return lowerStackSave(MI);
4559 case G_STACKRESTORE:
4560 return lowerStackRestore(MI);
4561 case G_EXTRACT:
4562 return lowerExtract(MI);
4563 case G_INSERT:
4564 return lowerInsert(MI);
4565 case G_BSWAP:
4566 return lowerBswap(MI);
4567 case G_BITREVERSE:
4568 return lowerBitreverse(MI);
4569 case G_READ_REGISTER:
4570 case G_WRITE_REGISTER:
4571 return lowerReadWriteRegister(MI);
4572 case G_UADDSAT:
4573 case G_USUBSAT: {
4574 // Try to make a reasonable guess about which lowering strategy to use. The
4575 // target can override this with custom lowering and calling the
4576 // implementation functions.
4577 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4578 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4579 return lowerAddSubSatToMinMax(MI);
4581 }
4582 case G_SADDSAT:
4583 case G_SSUBSAT: {
4584 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4585
4586 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4587 // since it's a shorter expansion. However, we would need to figure out the
4588 // preferred boolean type for the carry out for the query.
4589 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4590 return lowerAddSubSatToMinMax(MI);
4592 }
4593 case G_SSHLSAT:
4594 case G_USHLSAT:
4595 return lowerShlSat(MI);
4596 case G_ABS:
4597 return lowerAbsToAddXor(MI);
4598 case G_FABS:
4599 return lowerFAbs(MI);
4600 case G_SELECT:
4601 return lowerSelect(MI);
4602 case G_IS_FPCLASS:
4603 return lowerISFPCLASS(MI);
4604 case G_SDIVREM:
4605 case G_UDIVREM:
4606 return lowerDIVREM(MI);
4607 case G_FSHL:
4608 case G_FSHR:
4609 return lowerFunnelShift(MI);
4610 case G_ROTL:
4611 case G_ROTR:
4612 return lowerRotate(MI);
4613 case G_MEMSET:
4614 case G_MEMCPY:
4615 case G_MEMMOVE:
4616 return lowerMemCpyFamily(MI);
4617 case G_MEMCPY_INLINE:
4618 return lowerMemcpyInline(MI);
4619 case G_ZEXT:
4620 case G_SEXT:
4621 case G_ANYEXT:
4622 return lowerEXT(MI);
4623 case G_TRUNC:
4624 return lowerTRUNC(MI);
4626 return lowerVectorReduction(MI);
4627 case G_VAARG:
4628 return lowerVAArg(MI);
4629 }
4630}
4631
4633 Align MinAlign) const {
4634 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4635 // datalayout for the preferred alignment. Also there should be a target hook
4636 // for this to allow targets to reduce the alignment and ignore the
4637 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4638 // the type.
4639 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4640}
4641
4644 MachinePointerInfo &PtrInfo) {
4647 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4648
4649 unsigned AddrSpace = DL.getAllocaAddrSpace();
4650 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4651
4652 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4653 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4654}
4655
4657 LLT VecTy) {
4658 LLT IdxTy = B.getMRI()->getType(IdxReg);
4659 unsigned NElts = VecTy.getNumElements();
4660
4661 int64_t IdxVal;
4662 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4663 if (IdxVal < VecTy.getNumElements())
4664 return IdxReg;
4665 // If a constant index would be out of bounds, clamp it as well.
4666 }
4667
4668 if (isPowerOf2_32(NElts)) {
4669 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4670 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4671 }
4672
4673 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4674 .getReg(0);
4675}
4676
4678 Register Index) {
4679 LLT EltTy = VecTy.getElementType();
4680
4681 // Calculate the element offset and add it to the pointer.
4682 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4683 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4684 "Converting bits to bytes lost precision");
4685
4686 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4687
4688 // Convert index to the correct size for the address space.
4690 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4691 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4692 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4693 if (IdxTy != MRI.getType(Index))
4694 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4695
4696 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4697 MIRBuilder.buildConstant(IdxTy, EltSize));
4698
4699 LLT PtrTy = MRI.getType(VecPtr);
4700 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4701}
4702
4703#ifndef NDEBUG
4704/// Check that all vector operands have same number of elements. Other operands
4705/// should be listed in NonVecOp.
4708 std::initializer_list<unsigned> NonVecOpIndices) {
4709 if (MI.getNumMemOperands() != 0)
4710 return false;
4711
4712 LLT VecTy = MRI.getType(MI.getReg(0));
4713 if (!VecTy.isVector())
4714 return false;
4715 unsigned NumElts = VecTy.getNumElements();
4716
4717 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4718 MachineOperand &Op = MI.getOperand(OpIdx);
4719 if (!Op.isReg()) {
4720 if (!is_contained(NonVecOpIndices, OpIdx))
4721 return false;
4722 continue;
4723 }
4724
4725 LLT Ty = MRI.getType(Op.getReg());
4726 if (!Ty.isVector()) {
4727 if (!is_contained(NonVecOpIndices, OpIdx))
4728 return false;
4729 continue;
4730 }
4731
4732 if (Ty.getNumElements() != NumElts)
4733 return false;
4734 }
4735
4736 return true;
4737}
4738#endif
4739
4740/// Fill \p DstOps with DstOps that have same number of elements combined as
4741/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4742/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4743/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4744static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4745 unsigned NumElts) {
4746 LLT LeftoverTy;
4747 assert(Ty.isVector() && "Expected vector type");
4748 LLT EltTy = Ty.getElementType();
4749 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4750 int NumParts, NumLeftover;
4751 std::tie(NumParts, NumLeftover) =
4752 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4753
4754 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4755 for (int i = 0; i < NumParts; ++i) {
4756 DstOps.push_back(NarrowTy);
4757 }
4758
4759 if (LeftoverTy.isValid()) {
4760 assert(NumLeftover == 1 && "expected exactly one leftover");
4761 DstOps.push_back(LeftoverTy);
4762 }
4763}
4764
4765/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4766/// made from \p Op depending on operand type.
4767static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4768 MachineOperand &Op) {
4769 for (unsigned i = 0; i < N; ++i) {
4770 if (Op.isReg())
4771 Ops.push_back(Op.getReg());
4772 else if (Op.isImm())
4773 Ops.push_back(Op.getImm());
4774 else if (Op.isPredicate())
4775 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4776 else
4777 llvm_unreachable("Unsupported type");
4778 }
4779}
4780
4781// Handle splitting vector operations which need to have the same number of
4782// elements in each type index, but each type index may have a different element
4783// type.
4784//
4785// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4786// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4787// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4788//
4789// Also handles some irregular breakdown cases, e.g.
4790// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4791// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4792// s64 = G_SHL s64, s32
4795 GenericMachineInstr &MI, unsigned NumElts,
4796 std::initializer_list<unsigned> NonVecOpIndices) {
4797 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4798 "Non-compatible opcode or not specified non-vector operands");
4799 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4800
4801 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4802 unsigned NumDefs = MI.getNumDefs();
4803
4804 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4805 // Build instructions with DstOps to use instruction found by CSE directly.
4806 // CSE copies found instruction into given vreg when building with vreg dest.
4807 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4808 // Output registers will be taken from created instructions.
4809 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4810 for (unsigned i = 0; i < NumDefs; ++i) {
4811 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4812 }
4813
4814 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4815 // Operands listed in NonVecOpIndices will be used as is without splitting;
4816 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4817 // scalar condition (op 1), immediate in sext_inreg (op 2).
4818 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4819 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4820 ++UseIdx, ++UseNo) {
4821 if (is_contained(NonVecOpIndices, UseIdx)) {
4822 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4823 MI.getOperand(UseIdx));
4824 } else {
4825 SmallVector<Register, 8> SplitPieces;
4826 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4827 MRI);
4828 for (auto Reg : SplitPieces)
4829 InputOpsPieces[UseNo].push_back(Reg);
4830 }
4831 }
4832
4833 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4834
4835 // Take i-th piece of each input operand split and build sub-vector/scalar
4836 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4837 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4839 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4840 Defs.push_back(OutputOpsPieces[DstNo][i]);
4841
4843 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4844 Uses.push_back(InputOpsPieces[InputNo][i]);
4845
4846 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4847 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4848 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4849 }
4850
4851 // Merge small outputs into MI's output for each def operand.
4852 if (NumLeftovers) {
4853 for (unsigned i = 0; i < NumDefs; ++i)
4854 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4855 } else {
4856 for (unsigned i = 0; i < NumDefs; ++i)
4857 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4858 }
4859
4860 MI.eraseFromParent();
4861 return Legalized;
4862}
4863
4866 unsigned NumElts) {
4867 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4868
4869 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4870 unsigned NumDefs = MI.getNumDefs();
4871
4872 SmallVector<DstOp, 8> OutputOpsPieces;
4873 SmallVector<Register, 8> OutputRegs;
4874 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4875
4876 // Instructions that perform register split will be inserted in basic block
4877 // where register is defined (basic block is in the next operand).
4878 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4879 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4880 UseIdx += 2, ++UseNo) {
4881 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4883 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4884 MIRBuilder, MRI);
4885 }
4886
4887 // Build PHIs with fewer elements.
4888 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4889 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4890 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4891 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4892 Phi.addDef(
4893 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4894 OutputRegs.push_back(Phi.getReg(0));
4895
4896 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4897 Phi.addUse(InputOpsPieces[j][i]);
4898 Phi.add(MI.getOperand(1 + j * 2 + 1));
4899 }
4900 }
4901
4902 // Set the insert point after the existing PHIs
4903 MachineBasicBlock &MBB = *MI.getParent();
4905
4906 // Merge small outputs into MI's def.
4907 if (NumLeftovers) {
4908 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4909 } else {
4910 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4911 }
4912
4913 MI.eraseFromParent();
4914 return Legalized;
4915}
4916
4919 unsigned TypeIdx,
4920 LLT NarrowTy) {
4921 const int NumDst = MI.getNumOperands() - 1;
4922 const Register SrcReg = MI.getOperand(NumDst).getReg();
4923 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4924 LLT SrcTy = MRI.getType(SrcReg);
4925
4926 if (TypeIdx != 1 || NarrowTy == DstTy)
4927 return UnableToLegalize;
4928
4929 // Requires compatible types. Otherwise SrcReg should have been defined by
4930 // merge-like instruction that would get artifact combined. Most likely
4931 // instruction that defines SrcReg has to perform more/fewer elements
4932 // legalization compatible with NarrowTy.
4933 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4934 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4935
4936 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4937 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4938 return UnableToLegalize;
4939
4940 // This is most likely DstTy (smaller then register size) packed in SrcTy
4941 // (larger then register size) and since unmerge was not combined it will be
4942 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4943 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4944
4945 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4946 //
4947 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4948 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4949 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4950 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4951 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4952 const int PartsPerUnmerge = NumDst / NumUnmerge;
4953
4954 for (int I = 0; I != NumUnmerge; ++I) {
4955 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4956
4957 for (int J = 0; J != PartsPerUnmerge; ++J)
4958 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4959 MIB.addUse(Unmerge.getReg(I));
4960 }
4961
4962 MI.eraseFromParent();
4963 return Legalized;
4964}
4965
4968 LLT NarrowTy) {
4969 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4970 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4971 // that should have been artifact combined. Most likely instruction that uses
4972 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4973 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4974 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4975 if (NarrowTy == SrcTy)
4976 return UnableToLegalize;
4977
4978 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4979 // is for old mir tests. Since the changes to more/fewer elements it should no
4980 // longer be possible to generate MIR like this when starting from llvm-ir
4981 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4982 if (TypeIdx == 1) {
4983 assert(SrcTy.isVector() && "Expected vector types");
4984 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4985 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4986 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4987 return UnableToLegalize;
4988 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4989 //
4990 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4991 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4992 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4993 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4994 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4995 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4996
4998 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4999 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5000 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5001 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5002 Elts.push_back(Unmerge.getReg(j));
5003 }
5004
5005 SmallVector<Register, 8> NarrowTyElts;
5006 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5007 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5008 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5009 ++i, Offset += NumNarrowTyElts) {
5010 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5011 NarrowTyElts.push_back(
5012 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5013 }
5014
5015 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5016 MI.eraseFromParent();
5017 return Legalized;
5018 }
5019
5020 assert(TypeIdx == 0 && "Bad type index");
5021 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5022 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5023 return UnableToLegalize;
5024
5025 // This is most likely SrcTy (smaller then register size) packed in DstTy
5026 // (larger then register size) and since merge was not combined it will be
5027 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5028 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5029
5030 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5031 //
5032 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5033 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5034 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5035 SmallVector<Register, 8> NarrowTyElts;
5036 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5037 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5038 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5039 for (unsigned i = 0; i < NumParts; ++i) {
5041 for (unsigned j = 0; j < NumElts; ++j)
5042 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5043 NarrowTyElts.push_back(
5044 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5045 }
5046
5047 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5048 MI.eraseFromParent();
5049 return Legalized;
5050}
5051
5054 unsigned TypeIdx,
5055 LLT NarrowVecTy) {
5056 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5057 Register InsertVal;
5058 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5059
5060 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5061 if (IsInsert)
5062 InsertVal = MI.getOperand(2).getReg();
5063
5064 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5065
5066 // TODO: Handle total scalarization case.
5067 if (!NarrowVecTy.isVector())
5068 return UnableToLegalize;
5069
5070 LLT VecTy = MRI.getType(SrcVec);
5071
5072 // If the index is a constant, we can really break this down as you would
5073 // expect, and index into the target size pieces.
5074 int64_t IdxVal;
5075 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5076 if (MaybeCst) {
5077 IdxVal = MaybeCst->Value.getSExtValue();
5078 // Avoid out of bounds indexing the pieces.
5079 if (IdxVal >= VecTy.getNumElements()) {
5080 MIRBuilder.buildUndef(DstReg);
5081 MI.eraseFromParent();
5082 return Legalized;
5083 }
5084
5085 SmallVector<Register, 8> VecParts;
5086 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5087
5088 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5089 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5090 TargetOpcode::G_ANYEXT);
5091
5092 unsigned NewNumElts = NarrowVecTy.getNumElements();
5093
5094 LLT IdxTy = MRI.getType(Idx);
5095 int64_t PartIdx = IdxVal / NewNumElts;
5096 auto NewIdx =
5097 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5098
5099 if (IsInsert) {
5100 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5101
5102 // Use the adjusted index to insert into one of the subvectors.
5103 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5104 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5105 VecParts[PartIdx] = InsertPart.getReg(0);
5106
5107 // Recombine the inserted subvector with the others to reform the result
5108 // vector.
5109 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5110 } else {
5111 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5112 }
5113
5114 MI.eraseFromParent();
5115 return Legalized;
5116 }
5117
5118 // With a variable index, we can't perform the operation in a smaller type, so
5119 // we're forced to expand this.
5120 //
5121 // TODO: We could emit a chain of compare/select to figure out which piece to
5122 // index.
5124}
5125
5128 LLT NarrowTy) {
5129 // FIXME: Don't know how to handle secondary types yet.
5130 if (TypeIdx != 0)
5131 return UnableToLegalize;
5132
5133 // This implementation doesn't work for atomics. Give up instead of doing
5134 // something invalid.
5135 if (LdStMI.isAtomic())
5136 return UnableToLegalize;
5137
5138 bool IsLoad = isa<GLoad>(LdStMI);
5139 Register ValReg = LdStMI.getReg(0);
5140 Register AddrReg = LdStMI.getPointerReg();
5141 LLT ValTy = MRI.getType(ValReg);
5142
5143 // FIXME: Do we need a distinct NarrowMemory legalize action?
5144 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5145 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5146 return UnableToLegalize;
5147 }
5148
5149 int NumParts = -1;
5150 int NumLeftover = -1;
5151 LLT LeftoverTy;
5152 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5153 if (IsLoad) {
5154 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5155 } else {
5156 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5157 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5158 NumParts = NarrowRegs.size();
5159 NumLeftover = NarrowLeftoverRegs.size();
5160 }
5161 }
5162
5163 if (NumParts == -1)
5164 return UnableToLegalize;
5165
5166 LLT PtrTy = MRI.getType(AddrReg);
5167 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5168
5169 unsigned TotalSize = ValTy.getSizeInBits();
5170
5171 // Split the load/store into PartTy sized pieces starting at Offset. If this
5172 // is a load, return the new registers in ValRegs. For a store, each elements
5173 // of ValRegs should be PartTy. Returns the next offset that needs to be
5174 // handled.
5176 auto MMO = LdStMI.getMMO();
5177 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5178 unsigned NumParts, unsigned Offset) -> unsigned {
5180 unsigned PartSize = PartTy.getSizeInBits();
5181 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5182 ++Idx) {
5183 unsigned ByteOffset = Offset / 8;
5184 Register NewAddrReg;
5185
5186 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
5187
5188 MachineMemOperand *NewMMO =
5189 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5190
5191 if (IsLoad) {
5192 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5193 ValRegs.push_back(Dst);
5194 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5195 } else {
5196 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5197 }
5198 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5199 }
5200
5201 return Offset;
5202 };
5203
5204 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5205 unsigned HandledOffset =
5206 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5207
5208 // Handle the rest of the register if this isn't an even type breakdown.
5209 if (LeftoverTy.isValid())
5210 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5211
5212 if (IsLoad) {
5213 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5214 LeftoverTy, NarrowLeftoverRegs);
5215 }
5216
5217 LdStMI.eraseFromParent();
5218 return Legalized;
5219}
5220
5223 LLT NarrowTy) {
5224 using namespace TargetOpcode;
5225 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
5226 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5227
5228 switch (MI.getOpcode()) {
5229 case G_IMPLICIT_DEF:
5230 case G_TRUNC:
5231 case G_AND:
5232 case G_OR:
5233 case G_XOR:
5234 case G_ADD:
5235 case G_SUB:
5236 case G_MUL:
5237 case G_PTR_ADD:
5238 case G_SMULH:
5239 case G_UMULH:
5240 case G_FADD:
5241 case G_FMUL:
5242 case G_FSUB:
5243 case G_FNEG:
5244 case G_FABS:
5245 case G_FCANONICALIZE:
5246 case G_FDIV:
5247 case G_FREM:
5248 case G_FMA:
5249 case G_FMAD:
5250 case G_FPOW:
5251 case G_FEXP:
5252 case G_FEXP2:
5253 case G_FEXP10:
5254 case G_FLOG:
5255 case G_FLOG2:
5256 case G_FLOG10:
5257 case G_FLDEXP:
5258 case G_FNEARBYINT:
5259 case G_FCEIL:
5260 case G_FFLOOR:
5261 case G_FRINT:
5262 case G_INTRINSIC_LRINT:
5263 case G_INTRINSIC_LLRINT:
5264 case G_INTRINSIC_ROUND:
5265 case G_INTRINSIC_ROUNDEVEN:
5266 case G_LROUND:
5267 case G_LLROUND:
5268 case G_INTRINSIC_TRUNC:
5269 case G_FCOS:
5270 case G_FSIN:
5271 case G_FTAN:
5272 case G_FACOS:
5273 case G_FASIN:
5274 case G_FATAN:
5275 case G_FATAN2:
5276 case G_FCOSH:
5277 case G_FSINH:
5278 case G_FTANH:
5279 case G_FSQRT:
5280 case G_BSWAP:
5281 case G_BITREVERSE:
5282 case G_SDIV:
5283 case G_UDIV:
5284 case G_SREM:
5285 case G_UREM:
5286 case G_SDIVREM:
5287 case G_UDIVREM:
5288 case G_SMIN:
5289 case G_SMAX:
5290 case G_UMIN:
5291 case G_UMAX:
5292 case G_ABS:
5293 case G_FMINNUM:
5294 case G_FMAXNUM:
5295 case G_FMINNUM_IEEE:
5296 case G_FMAXNUM_IEEE:
5297 case G_FMINIMUM:
5298 case G_FMAXIMUM:
5299 case G_FSHL:
5300 case G_FSHR:
5301 case G_ROTL:
5302 case G_ROTR:
5303 case G_FREEZE:
5304 case G_SADDSAT:
5305 case G_SSUBSAT:
5306 case G_UADDSAT:
5307 case G_USUBSAT:
5308 case G_UMULO:
5309 case G_SMULO:
5310 case G_SHL:
5311 case G_LSHR:
5312 case G_ASHR:
5313 case G_SSHLSAT:
5314 case G_USHLSAT:
5315 case G_CTLZ:
5316 case G_CTLZ_ZERO_UNDEF:
5317 case G_CTTZ:
5318 case G_CTTZ_ZERO_UNDEF:
5319 case G_CTPOP:
5320 case G_FCOPYSIGN:
5321 case G_ZEXT:
5322 case G_SEXT:
5323 case G_ANYEXT:
5324 case G_FPEXT:
5325 case G_FPTRUNC:
5326 case G_SITOFP:
5327 case G_UITOFP:
5328 case G_FPTOSI:
5329 case G_FPTOUI:
5330 case G_FPTOSI_SAT:
5331 case G_FPTOUI_SAT:
5332 case G_INTTOPTR:
5333 case G_PTRTOINT:
5334 case G_ADDRSPACE_CAST:
5335 case G_UADDO:
5336 case G_USUBO:
5337 case G_UADDE:
5338 case G_USUBE:
5339 case G_SADDO:
5340 case G_SSUBO:
5341 case G_SADDE:
5342 case G_SSUBE:
5343 case G_STRICT_FADD:
5344 case G_STRICT_FSUB:
5345 case G_STRICT_FMUL:
5346 case G_STRICT_FMA:
5347 case G_STRICT_FLDEXP:
5348 case G_FFREXP:
5349 return fewerElementsVectorMultiEltType(GMI, NumElts);
5350 case G_ICMP:
5351 case G_FCMP:
5352 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5353 case G_IS_FPCLASS:
5354 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5355 case G_SELECT:
5356 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5357 return fewerElementsVectorMultiEltType(GMI, NumElts);
5358 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5359 case G_PHI:
5360 return fewerElementsVectorPhi(GMI, NumElts);
5361 case G_UNMERGE_VALUES:
5362 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5363 case G_BUILD_VECTOR:
5364 assert(TypeIdx == 0 && "not a vector type index");
5365 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5366 case G_CONCAT_VECTORS:
5367 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5368 return UnableToLegalize;
5369 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5370 case G_EXTRACT_VECTOR_ELT:
5371 case G_INSERT_VECTOR_ELT:
5372 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5373 case G_LOAD:
5374 case G_STORE:
5375 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5376 case G_SEXT_INREG:
5377 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5379 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5380 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5381 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5382 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5383 case G_SHUFFLE_VECTOR:
5384 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5385 case G_FPOWI:
5386 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5387 case G_BITCAST:
5388 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5389 case G_INTRINSIC_FPTRUNC_ROUND:
5390 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5391 default:
5392 return UnableToLegalize;
5393 }
5394}
5395
5398 LLT NarrowTy) {
5399 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5400 "Not a bitcast operation");
5401
5402 if (TypeIdx != 0)
5403 return UnableToLegalize;
5404
5405 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5406
5407 unsigned NewElemCount =
5408 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5409 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5410
5411 // Split the Src and Dst Reg into smaller registers
5412 SmallVector<Register> SrcVRegs, BitcastVRegs;
5413 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5414 return UnableToLegalize;
5415
5416 // Build new smaller bitcast instructions
5417 // Not supporting Leftover types for now but will have to
5418 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5419 BitcastVRegs.push_back(
5420 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5421
5422 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5423 MI.eraseFromParent();
5424 return Legalized;
5425}
5426
5428 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5429 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5430 if (TypeIdx != 0)
5431 return UnableToLegalize;
5432
5433 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5434 MI.getFirst3RegLLTs();
5435 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5436 // The shuffle should be canonicalized by now.
5437 if (DstTy != Src1Ty)
5438 return UnableToLegalize;
5439 if (DstTy != Src2Ty)
5440 return UnableToLegalize;
5441
5442 if (!isPowerOf2_32(DstTy.getNumElements()))
5443 return UnableToLegalize;
5444
5445 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5446 // Further legalization attempts will be needed to do split further.
5447 NarrowTy =
5448 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5449 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5450
5451 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5452 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5453 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5454 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5455 SplitSrc2Regs[1]};
5456
5457 Register Hi, Lo;
5458
5459 // If Lo or Hi uses elements from at most two of the four input vectors, then
5460 // express it as a vector shuffle of those two inputs. Otherwise extract the
5461 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5463 for (unsigned High = 0; High < 2; ++High) {
5464 Register &Output = High ? Hi : Lo;
5465
5466 // Build a shuffle mask for the output, discovering on the fly which
5467 // input vectors to use as shuffle operands (recorded in InputUsed).
5468 // If building a suitable shuffle vector proves too hard, then bail
5469 // out with useBuildVector set.
5470 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5471 unsigned FirstMaskIdx = High * NewElts;
5472 bool UseBuildVector = false;
5473 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5474 // The mask element. This indexes into the input.
5475 int Idx = Mask[FirstMaskIdx + MaskOffset];
5476
5477 // The input vector this mask element indexes into.
5478 unsigned Input = (unsigned)Idx / NewElts;
5479
5480 if (Input >= std::size(Inputs)) {
5481 // The mask element does not index into any input vector.
5482 Ops.push_back(-1);
5483 continue;
5484 }
5485
5486 // Turn the index into an offset from the start of the input vector.
5487 Idx -= Input * NewElts;
5488
5489 // Find or create a shuffle vector operand to hold this input.
5490 unsigned OpNo;
5491 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5492 if (InputUsed[OpNo] == Input) {
5493 // This input vector is already an operand.
5494 break;
5495 } else if (InputUsed[OpNo] == -1U) {
5496 // Create a new operand for this input vector.
5497 InputUsed[OpNo] = Input;
5498 break;
5499 }
5500 }
5501
5502 if (OpNo >= std::size(InputUsed)) {
5503 // More than two input vectors used! Give up on trying to create a
5504 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5505 UseBuildVector = true;
5506 break;
5507 }
5508
5509 // Add the mask index for the new shuffle vector.
5510 Ops.push_back(Idx + OpNo * NewElts);
5511 }
5512
5513 if (UseBuildVector) {
5514 LLT EltTy = NarrowTy.getElementType();
5516
5517 // Extract the input elements by hand.
5518 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5519 // The mask element. This indexes into the input.
5520 int Idx = Mask[FirstMaskIdx + MaskOffset];
5521
5522 // The input vector this mask element indexes into.
5523 unsigned Input = (unsigned)Idx / NewElts;
5524
5525 if (Input >= std::size(Inputs)) {
5526 // The mask element is "undef" or indexes off the end of the input.
5527 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5528 continue;
5529 }
5530
5531 // Turn the index into an offset from the start of the input vector.
5532 Idx -= Input * NewElts;
5533
5534 // Extract the vector element by hand.
5535 SVOps.push_back(MIRBuilder
5536 .buildExtractVectorElement(
5537 EltTy, Inputs[Input],
5539 .getReg(0));
5540 }
5541
5542 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5543 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5544 } else if (InputUsed[0] == -1U) {
5545 // No input vectors were used! The result is undefined.
5546 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5547 } else {
5548 Register Op0 = Inputs[InputUsed[0]];
5549 // If only one input was used, use an undefined vector for the other.
5550 Register Op1 = InputUsed[1] == -1U
5551 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5552 : Inputs[InputUsed[1]];
5553 // At least one input vector was used. Create a new shuffle vector.
5554 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5555 }
5556
5557 Ops.clear();
5558 }
5559
5560 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5561 MI.eraseFromParent();
5562 return Legalized;
5563}
5564
5566 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5567 auto &RdxMI = cast<GVecReduce>(MI);
5568
5569 if (TypeIdx != 1)
5570 return UnableToLegalize;
5571
5572 // The semantics of the normal non-sequential reductions allow us to freely
5573 // re-associate the operation.
5574 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5575
5576 if (NarrowTy.isVector() &&
5577 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5578 return UnableToLegalize;
5579
5580 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5581 SmallVector<Register> SplitSrcs;
5582 // If NarrowTy is a scalar then we're being asked to scalarize.
5583 const unsigned NumParts =
5584 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5585 : SrcTy.getNumElements();
5586
5587 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5588 if (NarrowTy.isScalar()) {
5589 if (DstTy != NarrowTy)
5590 return UnableToLegalize; // FIXME: handle implicit extensions.
5591
5592 if (isPowerOf2_32(NumParts)) {
5593 // Generate a tree of scalar operations to reduce the critical path.
5594 SmallVector<Register> PartialResults;
5595 unsigned NumPartsLeft = NumParts;
5596 while (NumPartsLeft > 1) {
5597 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5598 PartialResults.emplace_back(
5600 .buildInstr(ScalarOpc, {NarrowTy},
5601 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5602 .getReg(0));
5603 }
5604 SplitSrcs = PartialResults;
5605 PartialResults.clear();
5606 NumPartsLeft = SplitSrcs.size();
5607 }
5608 assert(SplitSrcs.size() == 1);
5609 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5610 MI.eraseFromParent();
5611 return Legalized;
5612 }
5613 // If we can't generate a tree, then just do sequential operations.
5614 Register Acc = SplitSrcs[0];
5615 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5616 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5617 .getReg(0);
5618 MIRBuilder.buildCopy(DstReg, Acc);
5619 MI.eraseFromParent();
5620 return Legalized;
5621 }
5622 SmallVector<Register> PartialReductions;
5623 for (unsigned Part = 0; Part < NumParts; ++Part) {
5624 PartialReductions.push_back(
5625 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5626 .getReg(0));
5627 }
5628
5629 // If the types involved are powers of 2, we can generate intermediate vector
5630 // ops, before generating a final reduction operation.
5631 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5632 isPowerOf2_32(NarrowTy.getNumElements())) {
5633 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5634 }
5635
5636 Register Acc = PartialReductions[0];
5637 for (unsigned Part = 1; Part < NumParts; ++Part) {
5638 if (Part == NumParts - 1) {
5639 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5640 {Acc, PartialReductions[Part]});
5641 } else {
5642 Acc = MIRBuilder
5643 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5644 .getReg(0);
5645 }
5646 }
5647 MI.eraseFromParent();
5648 return Legalized;
5649}
5650
5653 unsigned int TypeIdx,
5654 LLT NarrowTy) {
5655 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5656 MI.getFirst3RegLLTs();
5657 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5658 DstTy != NarrowTy)
5659 return UnableToLegalize;
5660
5661 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5662 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5663 "Unexpected vecreduce opcode");
5664 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5665 ? TargetOpcode::G_FADD
5666 : TargetOpcode::G_FMUL;
5667
5668 SmallVector<Register> SplitSrcs;
5669 unsigned NumParts = SrcTy.getNumElements();
5670 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5671 Register Acc = ScalarReg;
5672 for (unsigned i = 0; i < NumParts; i++)
5673 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5674 .getReg(0);
5675
5676 MIRBuilder.buildCopy(DstReg, Acc);
5677 MI.eraseFromParent();
5678 return Legalized;
5679}
5680
5682LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5683 LLT SrcTy, LLT NarrowTy,
5684 unsigned ScalarOpc) {
5685 SmallVector<Register> SplitSrcs;
5686 // Split the sources into NarrowTy size pieces.
5687 extractParts(SrcReg, NarrowTy,
5688 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5689 MIRBuilder, MRI);
5690 // We're going to do a tree reduction using vector operations until we have
5691 // one NarrowTy size value left.
5692 while (SplitSrcs.size() > 1) {
5693 SmallVector<Register> PartialRdxs;
5694 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5695 Register LHS = SplitSrcs[Idx];
5696 Register RHS = SplitSrcs[Idx + 1];
5697 // Create the intermediate vector op.
5698 Register Res =
5699 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5700 PartialRdxs.push_back(Res);
5701 }
5702 SplitSrcs = std::move(PartialRdxs);
5703 }
5704 // Finally generate the requested NarrowTy based reduction.
5706 MI.getOperand(1).setReg(SplitSrcs[0]);
5708 return Legalized;
5709}
5710
5713 const LLT HalfTy, const LLT AmtTy) {
5714
5715 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5716 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5717 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5718
5719 if (Amt.isZero()) {
5720 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5721 MI.eraseFromParent();
5722 return Legalized;
5723 }
5724
5725 LLT NVT = HalfTy;
5726 unsigned NVTBits = HalfTy.getSizeInBits();
5727 unsigned VTBits = 2 * NVTBits;
5728
5729 SrcOp Lo(Register(0)), Hi(Register(0));
5730 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5731 if (Amt.ugt(VTBits)) {
5732 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5733 } else if (Amt.ugt(NVTBits)) {
5734 Lo = MIRBuilder.buildConstant(NVT, 0);
5735 Hi = MIRBuilder.buildShl(NVT, InL,
5736 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5737 } else if (Amt == NVTBits) {
5738 Lo = MIRBuilder.buildConstant(NVT, 0);
5739 Hi = InL;
5740 } else {
5741 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5742 auto OrLHS =
5743 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5744 auto OrRHS = MIRBuilder.buildLShr(
5745 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5746 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5747 }
5748 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5749 if (Amt.ugt(VTBits)) {
5750 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5751 } else if (Amt.ugt(NVTBits)) {
5752 Lo = MIRBuilder.buildLShr(NVT, InH,
5753 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5754 Hi = MIRBuilder.buildConstant(NVT, 0);
5755 } else if (Amt == NVTBits) {
5756 Lo = InH;
5757 Hi = MIRBuilder.buildConstant(NVT, 0);
5758 } else {
5759 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5760
5761 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5762 auto OrRHS = MIRBuilder.buildShl(
5763 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5764
5765 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5766 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5767 }
5768 } else {
5769 if (Amt.ugt(VTBits)) {
5771 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5772 } else if (Amt.ugt(NVTBits)) {
5773 Lo = MIRBuilder.buildAShr(NVT, InH,
5774 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5775 Hi = MIRBuilder.buildAShr(NVT, InH,
5776 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5777 } else if (Amt == NVTBits) {
5778 Lo = InH;
5779 Hi = MIRBuilder.buildAShr(NVT, InH,
5780 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5781 } else {
5782 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5783
5784 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5785 auto OrRHS = MIRBuilder.buildShl(
5786 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5787
5788 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5789 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5790 }
5791 }
5792
5793 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5794 MI.eraseFromParent();
5795
5796 return Legalized;
5797}
5798
5799// TODO: Optimize if constant shift amount.
5802 LLT RequestedTy) {
5803 if (TypeIdx == 1) {
5805 narrowScalarSrc(MI, RequestedTy, 2);
5807 return Legalized;
5808 }
5809
5810 Register DstReg = MI.getOperand(0).getReg();
5811 LLT DstTy = MRI.getType(DstReg);
5812 if (DstTy.isVector())
5813 return UnableToLegalize;
5814
5815 Register Amt = MI.getOperand(2).getReg();
5816 LLT ShiftAmtTy = MRI.getType(Amt);
5817 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5818 if (DstEltSize % 2 != 0)
5819 return UnableToLegalize;
5820
5821 // Ignore the input type. We can only go to exactly half the size of the
5822 // input. If that isn't small enough, the resulting pieces will be further
5823 // legalized.
5824 const unsigned NewBitSize = DstEltSize / 2;
5825 const LLT HalfTy = LLT::scalar(NewBitSize);
5826 const LLT CondTy = LLT::scalar(1);
5827
5828 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5829 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5830 ShiftAmtTy);
5831 }
5832
5833 // TODO: Expand with known bits.
5834
5835 // Handle the fully general expansion by an unknown amount.
5836 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5837
5838 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5839 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5840 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5841
5842 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5843 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5844
5845 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5846 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5847 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5848
5849 Register ResultRegs[2];
5850 switch (MI.getOpcode()) {
5851 case TargetOpcode::G_SHL: {
5852 // Short: ShAmt < NewBitSize
5853 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5854
5855 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5856 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5857 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5858
5859 // Long: ShAmt >= NewBitSize
5860 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5861 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5862
5863 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5864 auto Hi = MIRBuilder.buildSelect(
5865 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5866
5867 ResultRegs[0] = Lo.getReg(0);
5868 ResultRegs[1] = Hi.getReg(0);
5869 break;
5870 }
5871 case TargetOpcode::G_LSHR:
5872 case TargetOpcode::G_ASHR: {
5873 // Short: ShAmt < NewBitSize
5874 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5875
5876 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5877 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5878 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5879
5880 // Long: ShAmt >= NewBitSize
5882 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5883 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5884 } else {
5885 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5886 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5887 }
5888 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5889 {InH, AmtExcess}); // Lo from Hi part.
5890
5891 auto Lo = MIRBuilder.buildSelect(
5892 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5893
5894 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5895
5896 ResultRegs[0] = Lo.getReg(0);
5897 ResultRegs[1] = Hi.getReg(0);
5898 break;
5899 }
5900 default:
5901 llvm_unreachable("not a shift");
5902 }
5903
5904 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5905 MI.eraseFromParent();
5906 return Legalized;
5907}
5908
5911 LLT MoreTy) {
5912 assert(TypeIdx == 0 && "Expecting only Idx 0");
5913
5915 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5916 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5918 moreElementsVectorSrc(MI, MoreTy, I);
5919 }
5920
5921 MachineBasicBlock &MBB = *MI.getParent();
5923 moreElementsVectorDst(MI, MoreTy, 0);
5925 return Legalized;
5926}
5927
5928MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5929 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5930 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5931
5932 switch (Opcode) {
5933 default:
5935 "getNeutralElementForVecReduce called with invalid opcode!");
5936 case TargetOpcode::G_VECREDUCE_ADD:
5937 case TargetOpcode::G_VECREDUCE_OR:
5938 case TargetOpcode::G_VECREDUCE_XOR:
5939 case TargetOpcode::G_VECREDUCE_UMAX:
5940 return MIRBuilder.buildConstant(Ty, 0);
5941 case TargetOpcode::G_VECREDUCE_MUL:
5942 return MIRBuilder.buildConstant(Ty, 1);
5943 case TargetOpcode::G_VECREDUCE_AND:
5944 case TargetOpcode::G_VECREDUCE_UMIN:
5947 case TargetOpcode::G_VECREDUCE_SMAX:
5950 case TargetOpcode::G_VECREDUCE_SMIN:
5953 case TargetOpcode::G_VECREDUCE_FADD:
5954 return MIRBuilder.buildFConstant(Ty, -0.0);
5955 case TargetOpcode::G_VECREDUCE_FMUL:
5956 return MIRBuilder.buildFConstant(Ty, 1.0);
5957 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5958 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5959 assert(false && "getNeutralElementForVecReduce unimplemented for "
5960 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5961 }
5962 llvm_unreachable("switch expected to return!");
5963}
5964
5967 LLT MoreTy) {
5968 unsigned Opc = MI.getOpcode();
5969 switch (Opc) {
5970 case TargetOpcode::G_IMPLICIT_DEF:
5971 case TargetOpcode::G_LOAD: {
5972 if (TypeIdx != 0)
5973 return UnableToLegalize;
5975 moreElementsVectorDst(MI, MoreTy, 0);
5977 return Legalized;
5978 }
5979 case TargetOpcode::G_STORE:
5980 if (TypeIdx != 0)
5981 return UnableToLegalize;
5983 moreElementsVectorSrc(MI, MoreTy, 0);
5985 return Legalized;
5986 case TargetOpcode::G_AND:
5987 case TargetOpcode::G_OR:
5988 case TargetOpcode::G_XOR:
5989 case TargetOpcode::G_ADD:
5990 case TargetOpcode::G_SUB:
5991 case TargetOpcode::G_MUL:
5992 case TargetOpcode::G_FADD:
5993 case TargetOpcode::G_FSUB:
5994 case TargetOpcode::G_FMUL:
5995 case TargetOpcode::G_FDIV:
5996 case TargetOpcode::G_FCOPYSIGN:
5997 case TargetOpcode::G_UADDSAT:
5998 case TargetOpcode::G_USUBSAT:
5999 case TargetOpcode::G_SADDSAT:
6000 case TargetOpcode::G_SSUBSAT:
6001 case TargetOpcode::G_SMIN:
6002 case TargetOpcode::G_SMAX:
6003 case TargetOpcode::G_UMIN:
6004 case TargetOpcode::G_UMAX:
6005 case TargetOpcode::G_FMINNUM:
6006 case TargetOpcode::G_FMAXNUM:
6007 case TargetOpcode::G_FMINNUM_IEEE:
6008 case TargetOpcode::G_FMAXNUM_IEEE:
6009 case TargetOpcode::G_FMINIMUM:
6010 case TargetOpcode::G_FMAXIMUM:
6011 case TargetOpcode::G_STRICT_FADD:
6012 case TargetOpcode::G_STRICT_FSUB:
6013 case TargetOpcode::G_STRICT_FMUL:
6014 case TargetOpcode::G_SHL:
6015 case TargetOpcode::G_ASHR:
6016 case TargetOpcode::G_LSHR: {
6018 moreElementsVectorSrc(MI, MoreTy, 1);
6019 moreElementsVectorSrc(MI, MoreTy, 2);
6020 moreElementsVectorDst(MI, MoreTy, 0);
6022 return Legalized;
6023 }
6024 case TargetOpcode::G_FMA:
6025 case TargetOpcode::G_STRICT_FMA:
6026 case TargetOpcode::G_FSHR:
6027 case TargetOpcode::G_FSHL: {
6029 moreElementsVectorSrc(MI, MoreTy, 1);
6030 moreElementsVectorSrc(MI, MoreTy, 2);
6031 moreElementsVectorSrc(MI, MoreTy, 3);
6032 moreElementsVectorDst(MI, MoreTy, 0);
6034 return Legalized;
6035 }
6036 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6037 case TargetOpcode::G_EXTRACT:
6038 if (TypeIdx != 1)
6039 return UnableToLegalize;
6041 moreElementsVectorSrc(MI, MoreTy, 1);
6043 return Legalized;
6044 case TargetOpcode::G_INSERT:
6045 case TargetOpcode::G_INSERT_VECTOR_ELT:
6046 case TargetOpcode::G_FREEZE:
6047 case TargetOpcode::G_FNEG:
6048 case TargetOpcode::G_FABS:
6049 case TargetOpcode::G_FSQRT:
6050 case TargetOpcode::G_FCEIL:
6051 case TargetOpcode::G_FFLOOR:
6052 case TargetOpcode::G_FNEARBYINT:
6053 case TargetOpcode::G_FRINT:
6054 case TargetOpcode::G_INTRINSIC_ROUND:
6055 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6056 case TargetOpcode::G_INTRINSIC_TRUNC:
6057 case TargetOpcode::G_BSWAP:
6058 case TargetOpcode::G_FCANONICALIZE:
6059 case TargetOpcode::G_SEXT_INREG:
6060 case TargetOpcode::G_ABS:
6061 if (TypeIdx != 0)
6062 return UnableToLegalize;
6064 moreElementsVectorSrc(MI, MoreTy, 1);
6065 moreElementsVectorDst(MI, MoreTy, 0);
6067 return Legalized;
6068 case TargetOpcode::G_SELECT: {
6069 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6070 if (TypeIdx == 1) {
6071 if (!CondTy.isScalar() ||
6072 DstTy.getElementCount() != MoreTy.getElementCount())
6073 return UnableToLegalize;
6074
6075 // This is turning a scalar select of vectors into a vector
6076 // select. Broadcast the select condition.
6077 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6079 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6081 return Legalized;
6082 }
6083
6084 if (CondTy.isVector())
6085 return UnableToLegalize;
6086
6088 moreElementsVectorSrc(MI, MoreTy, 2);
6089 moreElementsVectorSrc(MI, MoreTy, 3);
6090 moreElementsVectorDst(MI, MoreTy, 0);
6092 return Legalized;
6093 }
6094 case TargetOpcode::G_UNMERGE_VALUES:
6095 return UnableToLegalize;
6096 case TargetOpcode::G_PHI:
6097 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6098 case TargetOpcode::G_SHUFFLE_VECTOR:
6099 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6100 case TargetOpcode::G_BUILD_VECTOR: {
6102 for (auto Op : MI.uses()) {
6103 Elts.push_back(Op.getReg());
6104 }
6105
6106 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6108 }
6109
6111 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6112 MI.eraseFromParent();
6113 return Legalized;
6114 }
6115 case TargetOpcode::G_SEXT:
6116 case TargetOpcode::G_ZEXT:
6117 case TargetOpcode::G_ANYEXT:
6118 case TargetOpcode::G_TRUNC:
6119 case TargetOpcode::G_FPTRUNC:
6120 case TargetOpcode::G_FPEXT:
6121 case TargetOpcode::G_FPTOSI:
6122 case TargetOpcode::G_FPTOUI:
6123 case TargetOpcode::G_FPTOSI_SAT:
6124 case TargetOpcode::G_FPTOUI_SAT:
6125 case TargetOpcode::G_SITOFP:
6126 case TargetOpcode::G_UITOFP: {
6128 LLT SrcExtTy;
6129 LLT DstExtTy;
6130 if (TypeIdx == 0) {
6131 DstExtTy = MoreTy;
6132 SrcExtTy = LLT::fixed_vector(
6133 MoreTy.getNumElements(),
6134 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6135 } else {
6136 DstExtTy = LLT::fixed_vector(
6137 MoreTy.getNumElements(),
6138 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6139 SrcExtTy = MoreTy;
6140 }
6141 moreElementsVectorSrc(MI, SrcExtTy, 1);
6142 moreElementsVectorDst(MI, DstExtTy, 0);
6144 return Legalized;
6145 }
6146 case TargetOpcode::G_ICMP:
6147 case TargetOpcode::G_FCMP: {
6148 if (TypeIdx != 1)
6149 return UnableToLegalize;
6150
6152 moreElementsVectorSrc(MI, MoreTy, 2);
6153 moreElementsVectorSrc(MI, MoreTy, 3);
6154 LLT CondTy = LLT::fixed_vector(
6155 MoreTy.getNumElements(),
6156 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6157 moreElementsVectorDst(MI, CondTy, 0);
6159 return Legalized;
6160 }
6161 case TargetOpcode::G_BITCAST: {
6162 if (TypeIdx != 0)
6163 return UnableToLegalize;
6164
6165 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6166 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6167
6168 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6169 if (coefficient % DstTy.getNumElements() != 0)
6170 return UnableToLegalize;
6171
6172 coefficient = coefficient / DstTy.getNumElements();
6173
6174 LLT NewTy = SrcTy.changeElementCount(
6175 ElementCount::get(coefficient, MoreTy.isScalable()));
6177 moreElementsVectorSrc(MI, NewTy, 1);
6178 moreElementsVectorDst(MI, MoreTy, 0);
6180 return Legalized;
6181 }
6182 case TargetOpcode::G_VECREDUCE_FADD:
6183 case TargetOpcode::G_VECREDUCE_FMUL:
6184 case TargetOpcode::G_VECREDUCE_ADD:
6185 case TargetOpcode::G_VECREDUCE_MUL:
6186 case TargetOpcode::G_VECREDUCE_AND:
6187 case TargetOpcode::G_VECREDUCE_OR:
6188 case TargetOpcode::G_VECREDUCE_XOR:
6189 case TargetOpcode::G_VECREDUCE_SMAX:
6190 case TargetOpcode::G_VECREDUCE_SMIN:
6191 case TargetOpcode::G_VECREDUCE_UMAX:
6192 case TargetOpcode::G_VECREDUCE_UMIN: {
6193 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6194 MachineOperand &MO = MI.getOperand(1);
6195 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6196 auto NeutralElement = getNeutralElementForVecReduce(
6197 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6198
6200 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6201 i != e; i++) {
6202 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6203 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6204 NeutralElement, Idx);
6205 }
6206
6208 MO.setReg(NewVec.getReg(0));
6210 return Legalized;
6211 }
6212
6213 default:
6214 return UnableToLegalize;
6215 }
6216}
6217
6220 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6221 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6222 unsigned MaskNumElts = Mask.size();
6223 unsigned SrcNumElts = SrcTy.getNumElements();
6224 LLT DestEltTy = DstTy.getElementType();
6225
6226 if (MaskNumElts == SrcNumElts)
6227 return Legalized;
6228
6229 if (MaskNumElts < SrcNumElts) {
6230 // Extend mask to match new destination vector size with
6231 // undef values.
6232 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6233 llvm::copy(Mask, NewMask.begin());
6234
6235 moreElementsVectorDst(MI, SrcTy, 0);
6237 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6238 MI.getOperand(1).getReg(),
6239 MI.getOperand(2).getReg(), NewMask);
6240 MI.eraseFromParent();
6241
6242 return Legalized;
6243 }
6244
6245 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6246 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6247 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6248
6249 // Create new source vectors by concatenating the initial
6250 // source vectors with undefined vectors of the same size.
6251 auto Undef = MIRBuilder.buildUndef(SrcTy);
6252 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6253 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6254 MOps1[0] = MI.getOperand(1).getReg();
6255 MOps2[0] = MI.getOperand(2).getReg();
6256
6257 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6258 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6259
6260 // Readjust mask for new input vector length.
6261 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6262 for (unsigned I = 0; I != MaskNumElts; ++I) {
6263 int Idx = Mask[I];
6264 if (Idx >= static_cast<int>(SrcNumElts))
6265 Idx += PaddedMaskNumElts - SrcNumElts;
6266 MappedOps[I] = Idx;
6267 }
6268
6269 // If we got more elements than required, extract subvector.
6270 if (MaskNumElts != PaddedMaskNumElts) {
6271 auto Shuffle =
6272 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6273
6274 SmallVector<Register, 16> Elts(MaskNumElts);
6275 for (unsigned I = 0; I < MaskNumElts; ++I) {
6276 Elts[I] =
6278 .getReg(0);
6279 }
6280 MIRBuilder.buildBuildVector(DstReg, Elts);
6281 } else {
6282 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6283 }
6284
6285 MI.eraseFromParent();
6287}
6288
6291 unsigned int TypeIdx, LLT MoreTy) {
6292 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6293 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6294 unsigned NumElts = DstTy.getNumElements();
6295 unsigned WidenNumElts = MoreTy.getNumElements();
6296
6297 if (DstTy.isVector() && Src1Ty.isVector() &&
6298 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6300 }
6301
6302 if (TypeIdx != 0)
6303 return UnableToLegalize;
6304
6305 // Expect a canonicalized shuffle.
6306 if (DstTy != Src1Ty || DstTy != Src2Ty)
6307 return UnableToLegalize;
6308
6309 moreElementsVectorSrc(MI, MoreTy, 1);
6310 moreElementsVectorSrc(MI, MoreTy, 2);
6311
6312 // Adjust mask based on new input vector length.
6313 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6314 for (unsigned I = 0; I != NumElts; ++I) {
6315 int Idx = Mask[I];
6316 if (Idx < static_cast<int>(NumElts))
6317 NewMask[I] = Idx;
6318 else
6319 NewMask[I] = Idx - NumElts + WidenNumElts;
6320 }
6321 moreElementsVectorDst(MI, MoreTy, 0);
6323 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6324 MI.getOperand(1).getReg(),
6325 MI.getOperand(2).getReg(), NewMask);
6326 MI.eraseFromParent();
6327 return Legalized;
6328}
6329
6330void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6331 ArrayRef<Register> Src1Regs,
6332 ArrayRef<Register> Src2Regs,
6333 LLT NarrowTy) {
6335 unsigned SrcParts = Src1Regs.size();
6336 unsigned DstParts = DstRegs.size();
6337
6338 unsigned DstIdx = 0; // Low bits of the result.
6339 Register FactorSum =
6340 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6341 DstRegs[DstIdx] = FactorSum;
6342
6343 unsigned CarrySumPrevDstIdx;
6345
6346 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6347 // Collect low parts of muls for DstIdx.
6348 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6349 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6351 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6352 Factors.push_back(Mul.getReg(0));
6353 }
6354 // Collect high parts of muls from previous DstIdx.
6355 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6356 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6357 MachineInstrBuilder Umulh =
6358 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6359 Factors.push_back(Umulh.getReg(0));
6360 }
6361 // Add CarrySum from additions calculated for previous DstIdx.
6362 if (DstIdx != 1) {
6363 Factors.push_back(CarrySumPrevDstIdx);
6364 }
6365
6366 Register CarrySum;
6367 // Add all factors and accumulate all carries into CarrySum.
6368 if (DstIdx != DstParts - 1) {
6369 MachineInstrBuilder Uaddo =
6370 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6371 FactorSum = Uaddo.getReg(0);
6372 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6373 for (unsigned i = 2; i < Factors.size(); ++i) {
6374 MachineInstrBuilder Uaddo =
6375 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6376 FactorSum = Uaddo.getReg(0);
6377 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6378 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6379 }
6380 } else {
6381 // Since value for the next index is not calculated, neither is CarrySum.
6382 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6383 for (unsigned i = 2; i < Factors.size(); ++i)
6384 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6385 }
6386
6387 CarrySumPrevDstIdx = CarrySum;
6388 DstRegs[DstIdx] = FactorSum;
6389 Factors.clear();
6390 }
6391}
6392
6395 LLT NarrowTy) {
6396 if (TypeIdx != 0)
6397 return UnableToLegalize;
6398
6399 Register DstReg = MI.getOperand(0).getReg();
6400 LLT DstType = MRI.getType(DstReg);
6401 // FIXME: add support for vector types
6402 if (DstType.isVector())
6403 return UnableToLegalize;
6404
6405 unsigned Opcode = MI.getOpcode();
6406 unsigned OpO, OpE, OpF;
6407 switch (Opcode) {
6408 case TargetOpcode::G_SADDO:
6409 case TargetOpcode::G_SADDE:
6410 case TargetOpcode::G_UADDO:
6411 case TargetOpcode::G_UADDE:
6412 case TargetOpcode::G_ADD:
6413 OpO = TargetOpcode::G_UADDO;
6414 OpE = TargetOpcode::G_UADDE;
6415 OpF = TargetOpcode::G_UADDE;
6416 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6417 OpF = TargetOpcode::G_SADDE;
6418 break;
6419 case TargetOpcode::G_SSUBO:
6420 case TargetOpcode::G_SSUBE:
6421 case TargetOpcode::G_USUBO:
6422 case TargetOpcode::G_USUBE:
6423 case TargetOpcode::G_SUB:
6424 OpO = TargetOpcode::G_USUBO;
6425 OpE = TargetOpcode::G_USUBE;
6426 OpF = TargetOpcode::G_USUBE;
6427 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6428 OpF = TargetOpcode::G_SSUBE;
6429 break;
6430 default:
6431 llvm_unreachable("Unexpected add/sub opcode!");
6432 }
6433
6434 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6435 unsigned NumDefs = MI.getNumExplicitDefs();
6436 Register Src1 = MI.getOperand(NumDefs).getReg();
6437 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
6438 Register CarryDst, CarryIn;
6439 if (NumDefs == 2)
6440 CarryDst = MI.getOperand(1).getReg();
6441 if (MI.getNumOperands() == NumDefs + 3)
6442 CarryIn = MI.getOperand(NumDefs + 2).getReg();
6443
6444 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6445 LLT LeftoverTy, DummyTy;
6446 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
6447 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6448 MIRBuilder, MRI);
6449 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6450 MRI);
6451
6452 int NarrowParts = Src1Regs.size();
6453 Src1Regs.append(Src1Left);
6454 Src2Regs.append(Src2Left);
6455 DstRegs.reserve(Src1Regs.size());
6456
6457 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6458 Register DstReg =
6459 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
6460 Register CarryOut;
6461 // Forward the final carry-out to the destination register
6462 if (i == e - 1 && CarryDst)
6463 CarryOut = CarryDst;
6464 else
6465 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
6466
6467 if (!CarryIn) {
6468 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6469 {Src1Regs[i], Src2Regs[i]});
6470 } else if (i == e - 1) {
6471 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6472 {Src1Regs[i], Src2Regs[i], CarryIn});
6473 } else {
6474 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6475 {Src1Regs[i], Src2Regs[i], CarryIn});
6476 }
6477
6478 DstRegs.push_back(DstReg);
6479 CarryIn = CarryOut;
6480 }
6481 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6482 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6483 ArrayRef(DstRegs).drop_front(NarrowParts));
6484
6485 MI.eraseFromParent();
6486 return Legalized;
6487}
6488
6491 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
6492
6493 LLT Ty = MRI.getType(DstReg);
6494 if (Ty.isVector())
6495 return UnableToLegalize;
6496
6497 unsigned Size = Ty.getSizeInBits();
6498 unsigned NarrowSize = NarrowTy.getSizeInBits();
6499 if (Size % NarrowSize != 0)
6500 return UnableToLegalize;
6501
6502 unsigned NumParts = Size / NarrowSize;
6503 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6504 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6505
6506 SmallVector<Register, 2> Src1Parts, Src2Parts;
6507 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6508 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6509 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6510 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6511
6512 // Take only high half of registers if this is high mul.
6513 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6514 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6515 MI.eraseFromParent();
6516 return Legalized;
6517}
6518
6521 LLT NarrowTy) {
6522 if (TypeIdx != 0)
6523 return UnableToLegalize;
6524
6525 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6526
6527 Register Src = MI.getOperand(1).getReg();
6528 LLT SrcTy = MRI.getType(Src);
6529
6530 // If all finite floats fit into the narrowed integer type, we can just swap
6531 // out the result type. This is practically only useful for conversions from
6532 // half to at least 16-bits, so just handle the one case.
6533 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6534 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6535 return UnableToLegalize;
6536
6538 narrowScalarDst(MI, NarrowTy, 0,
6539 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6541 return Legalized;
6542}
6543
6546 LLT NarrowTy) {
6547 if (TypeIdx != 1)
6548 return UnableToLegalize;
6549
6550 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6551
6552 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6553 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6554 // NarrowSize.
6555 if (SizeOp1 % NarrowSize != 0)
6556 return UnableToLegalize;
6557 int NumParts = SizeOp1 / NarrowSize;
6558
6559 SmallVector<Register, 2> SrcRegs, DstRegs;
6561 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6562 MIRBuilder, MRI);
6563
6564 Register OpReg = MI.getOperand(0).getReg();
6565 uint64_t OpStart = MI.getOperand(2).getImm();
6566 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6567 for (int i = 0; i < NumParts; ++i) {
6568 unsigned SrcStart = i * NarrowSize;
6569
6570 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6571 // No part of the extract uses this subregister, ignore it.
6572 continue;
6573 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6574 // The entire subregister is extracted, forward the value.
6575 DstRegs.push_back(SrcRegs[i]);
6576 continue;
6577 }
6578
6579 // OpSegStart is where this destination segment would start in OpReg if it
6580 // extended infinitely in both directions.
6581 int64_t ExtractOffset;
6582 uint64_t SegSize;
6583 if (OpStart < SrcStart) {
6584 ExtractOffset = 0;
6585 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6586 } else {
6587 ExtractOffset = OpStart - SrcStart;
6588 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6589 }
6590
6591 Register SegReg = SrcRegs[i];
6592 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6593 // A genuine extract is needed.
6594 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6595 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6596 }
6597
6598 DstRegs.push_back(SegReg);
6599 }
6600
6601 Register DstReg = MI.getOperand(0).getReg();
6602 if (MRI.getType(DstReg).isVector())
6603 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6604 else if (DstRegs.size() > 1)
6605 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6606 else
6607 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6608 MI.eraseFromParent();
6609 return Legalized;
6610}
6611
6614 LLT NarrowTy) {
6615 // FIXME: Don't know how to handle secondary types yet.
6616 if (TypeIdx != 0)
6617 return UnableToLegalize;
6618
6619 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6621 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6622 LLT LeftoverTy;
6623 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6624 LeftoverRegs, MIRBuilder, MRI);
6625
6626 SrcRegs.append(LeftoverRegs);
6627
6628 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6629 Register OpReg = MI.getOperand(2).getReg();
6630 uint64_t OpStart = MI.getOperand(3).getImm();
6631 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6632 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6633 unsigned DstStart = I * NarrowSize;
6634
6635 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6636 // The entire subregister is defined by this insert, forward the new
6637 // value.
6638 DstRegs.push_back(OpReg);
6639 continue;
6640 }
6641
6642 Register SrcReg = SrcRegs[I];
6643 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6644 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6645 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6646 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6647 }
6648
6649 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6650 // No part of the insert affects this subregister, forward the original.
6651 DstRegs.push_back(SrcReg);
6652 continue;
6653 }
6654
6655 // OpSegStart is where this destination segment would start in OpReg if it
6656 // extended infinitely in both directions.
6657 int64_t ExtractOffset, InsertOffset;
6658 uint64_t SegSize;
6659 if (OpStart < DstStart) {
6660 InsertOffset = 0;
6661 ExtractOffset = DstStart - OpStart;
6662 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6663 } else {
6664 InsertOffset = OpStart - DstStart;
6665 ExtractOffset = 0;
6666 SegSize =
6667 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6668 }
6669
6670 Register SegReg = OpReg;
6671 if (ExtractOffset != 0 || SegSize != OpSize) {
6672 // A genuine extract is needed.
6673 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6674 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6675 }
6676
6677 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6678 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6679 DstRegs.push_back(DstReg);
6680 }
6681
6682 uint64_t WideSize = DstRegs.size() * NarrowSize;
6683 Register DstReg = MI.getOperand(0).getReg();
6684 if (WideSize > RegTy.getSizeInBits()) {
6685 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6686 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6687 MIRBuilder.buildTrunc(DstReg, MergeReg);
6688 } else
6689 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6690
6691 MI.eraseFromParent();
6692 return Legalized;
6693}
6694
6697 LLT NarrowTy) {
6698 Register DstReg = MI.getOperand(0).getReg();
6699 LLT DstTy = MRI.getType(DstReg);
6700
6701 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6702
6703 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6704 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6705 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6706 LLT LeftoverTy;
6707 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6708 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6709 return UnableToLegalize;
6710
6711 LLT Unused;
6712 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6713 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6714 llvm_unreachable("inconsistent extractParts result");
6715
6716 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6717 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6718 {Src0Regs[I], Src1Regs[I]});
6719 DstRegs.push_back(Inst.getReg(0));
6720 }
6721
6722 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6723 auto Inst = MIRBuilder.buildInstr(
6724 MI.getOpcode(),
6725 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6726 DstLeftoverRegs.push_back(Inst.getReg(0));
6727 }
6728
6729 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6730 LeftoverTy, DstLeftoverRegs);
6731
6732 MI.eraseFromParent();
6733 return Legalized;
6734}
6735
6738 LLT NarrowTy) {
6739 if (TypeIdx != 0)
6740 return UnableToLegalize;
6741
6742 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6743
6744 LLT DstTy = MRI.getType(DstReg);
6745 if (DstTy.isVector())
6746 return UnableToLegalize;
6747
6749 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6750 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6751 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6752
6753 MI.eraseFromParent();
6754 return Legalized;
6755}
6756
6759 LLT NarrowTy) {
6760 if (TypeIdx != 0)
6761 return UnableToLegalize;
6762
6763 Register CondReg = MI.getOperand(1).getReg();
6764 LLT CondTy = MRI.getType(CondReg);
6765 if (CondTy.isVector()) // TODO: Handle vselect
6766 return UnableToLegalize;
6767
6768 Register DstReg = MI.getOperand(0).getReg();
6769 LLT DstTy = MRI.getType(DstReg);
6770
6771 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6772 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6773 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6774 LLT LeftoverTy;
6775 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6776 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6777 return UnableToLegalize;
6778
6779 LLT Unused;
6780 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6781 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6782 llvm_unreachable("inconsistent extractParts result");
6783
6784 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6785 auto Select = MIRBuilder.buildSelect(NarrowTy,
6786 CondReg, Src1Regs[I], Src2Regs[I]);
6787 DstRegs.push_back(Select.getReg(0));
6788 }
6789
6790 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6792 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6793 DstLeftoverRegs.push_back(Select.getReg(0));
6794 }
6795
6796 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6797 LeftoverTy, DstLeftoverRegs);
6798
6799 MI.eraseFromParent();
6800 return Legalized;
6801}
6802
6805 LLT NarrowTy) {
6806 if (TypeIdx != 1)
6807 return UnableToLegalize;
6808
6809 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6810 unsigned NarrowSize = NarrowTy.getSizeInBits();
6811
6812 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6813 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6814
6816 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6817 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6818 auto C_0 = B.buildConstant(NarrowTy, 0);
6819 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6820 UnmergeSrc.getReg(1), C_0);
6821 auto LoCTLZ = IsUndef ?
6822 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6823 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6824 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6825 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6826 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6827 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6828
6829 MI.eraseFromParent();
6830 return Legalized;
6831 }
6832
6833 return UnableToLegalize;
6834}
6835
6838 LLT NarrowTy) {
6839 if (TypeIdx != 1)
6840 return UnableToLegalize;
6841
6842 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6843 unsigned NarrowSize = NarrowTy.getSizeInBits();
6844
6845 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6846 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6847
6849 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6850 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6851 auto C_0 = B.buildConstant(NarrowTy, 0);
6852 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6853 UnmergeSrc.getReg(0), C_0);
6854 auto HiCTTZ = IsUndef ?
6855 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6856 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6857 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6858 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6859 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6860 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6861
6862 MI.eraseFromParent();
6863 return Legalized;
6864 }
6865
6866 return UnableToLegalize;
6867}
6868
6871 LLT NarrowTy) {
6872 if (TypeIdx != 1)
6873 return UnableToLegalize;
6874
6875 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6876 unsigned NarrowSize = NarrowTy.getSizeInBits();
6877
6878 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6879 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6880
6881 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6882 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6883 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6884
6885 MI.eraseFromParent();
6886 return Legalized;
6887 }
6888
6889 return UnableToLegalize;
6890}
6891
6894 LLT NarrowTy) {
6895 if (TypeIdx != 1)
6896 return UnableToLegalize;
6897
6899 Register ExpReg = MI.getOperand(2).getReg();
6900 LLT ExpTy = MRI.getType(ExpReg);
6901
6902 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6903
6904 // Clamp the exponent to the range of the target type.
6905 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6906 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6907 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6908 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6909
6910 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6912 MI.getOperand(2).setReg(Trunc.getReg(0));
6914 return Legalized;
6915}
6916
6919 unsigned Opc = MI.getOpcode();
6920 const auto &TII = MIRBuilder.getTII();
6921 auto isSupported = [this](const LegalityQuery &Q) {
6922 auto QAction = LI.getAction(Q).Action;
6923 return QAction == Legal || QAction == Libcall || QAction == Custom;
6924 };
6925 switch (Opc) {
6926 default:
6927 return UnableToLegalize;
6928 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6929 // This trivially expands to CTLZ.
6931 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6933 return Legalized;
6934 }
6935 case TargetOpcode::G_CTLZ: {
6936 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6937 unsigned Len = SrcTy.getSizeInBits();
6938
6939 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6940 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6941 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6942 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6943 auto ICmp = MIRBuilder.buildICmp(
6944 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6945 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6946 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6947 MI.eraseFromParent();
6948 return Legalized;
6949 }
6950 // for now, we do this:
6951 // NewLen = NextPowerOf2(Len);
6952 // x = x | (x >> 1);
6953 // x = x | (x >> 2);
6954 // ...
6955 // x = x | (x >>16);
6956 // x = x | (x >>32); // for 64-bit input
6957 // Upto NewLen/2
6958 // return Len - popcount(x);
6959 //
6960 // Ref: "Hacker's Delight" by Henry Warren
6961 Register Op = SrcReg;
6962 unsigned NewLen = PowerOf2Ceil(Len);
6963 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6964 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6965 auto MIBOp = MIRBuilder.buildOr(
6966 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6967 Op = MIBOp.getReg(0);
6968 }
6969 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6970 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6971 MIBPop);
6972 MI.eraseFromParent();
6973 return Legalized;
6974 }
6975 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6976 // This trivially expands to CTTZ.
6978 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6980 return Legalized;
6981 }
6982 case TargetOpcode::G_CTTZ: {
6983 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6984
6985 unsigned Len = SrcTy.getSizeInBits();
6986 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6987 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6988 // zero.
6989 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6990 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6991 auto ICmp = MIRBuilder.buildICmp(
6992 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6993 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6994 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6995 MI.eraseFromParent();
6996 return Legalized;
6997 }
6998 // for now, we use: { return popcount(~x & (x - 1)); }
6999 // unless the target has ctlz but not ctpop, in which case we use:
7000 // { return 32 - nlz(~x & (x-1)); }
7001 // Ref: "Hacker's Delight" by Henry Warren
7002 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7003 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7004 auto MIBTmp = MIRBuilder.buildAnd(
7005 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7006 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7007 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7008 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7009 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7010 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7011 MI.eraseFromParent();
7012 return Legalized;
7013 }
7015 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7016 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7018 return Legalized;
7019 }
7020 case TargetOpcode::G_CTPOP: {
7021 Register SrcReg = MI.getOperand(1).getReg();
7022 LLT Ty = MRI.getType(SrcReg);
7023 unsigned Size = Ty.getSizeInBits();
7025
7026 // Count set bits in blocks of 2 bits. Default approach would be
7027 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7028 // We use following formula instead:
7029 // B2Count = val - { (val >> 1) & 0x55555555 }
7030 // since it gives same result in blocks of 2 with one instruction less.
7031 auto C_1 = B.buildConstant(Ty, 1);
7032 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7033 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7034 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7035 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7036 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7037
7038 // In order to get count in blocks of 4 add values from adjacent block of 2.
7039 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7040 auto C_2 = B.buildConstant(Ty, 2);
7041 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7042 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7043 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7044 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7045 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7046 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7047
7048 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7049 // addition since count value sits in range {0,...,8} and 4 bits are enough
7050 // to hold such binary values. After addition high 4 bits still hold count
7051 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7052 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7053 auto C_4 = B.buildConstant(Ty, 4);
7054 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7055 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7056 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7057 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7058 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7059
7060 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7061 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7062 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7063 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7064
7065 // Shift count result from 8 high bits to low bits.
7066 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7067
7068 auto IsMulSupported = [this](const LLT Ty) {
7069 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7070 return Action == Legal || Action == WidenScalar || Action == Custom;
7071 };
7072 if (IsMulSupported(Ty)) {
7073 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7074 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7075 } else {
7076 auto ResTmp = B8Count;
7077 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7078 auto ShiftC = B.buildConstant(Ty, Shift);
7079 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7080 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7081 }
7082 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7083 }
7084 MI.eraseFromParent();
7085 return Legalized;
7086 }
7087 }
7088}
7089
7090// Check that (every element of) Reg is undef or not an exact multiple of BW.
7092 Register Reg, unsigned BW) {
7093 return matchUnaryPredicate(
7094 MRI, Reg,
7095 [=](const Constant *C) {
7096 // Null constant here means an undef.
7097 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
7098 return !CI || CI->getValue().urem(BW) != 0;
7099 },
7100 /*AllowUndefs*/ true);
7101}
7102
7105 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7106 LLT Ty = MRI.getType(Dst);
7107 LLT ShTy = MRI.getType(Z);
7108
7109 unsigned BW = Ty.getScalarSizeInBits();
7110
7111 if (!isPowerOf2_32(BW))
7112 return UnableToLegalize;
7113
7114 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7115 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7116
7117 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7118 // fshl X, Y, Z -> fshr X, Y, -Z
7119 // fshr X, Y, Z -> fshl X, Y, -Z
7120 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7121 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7122 } else {
7123 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7124 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7125 auto One = MIRBuilder.buildConstant(ShTy, 1);
7126 if (IsFSHL) {
7127 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7128 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7129 } else {
7130 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7131 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7132 }
7133
7134 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7135 }
7136
7137 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7138 MI.eraseFromParent();
7139 return Legalized;
7140}
7141
7144 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7145 LLT Ty = MRI.getType(Dst);
7146 LLT ShTy = MRI.getType(Z);
7147
7148 const unsigned BW = Ty.getScalarSizeInBits();
7149 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7150
7151 Register ShX, ShY;
7152 Register ShAmt, InvShAmt;
7153
7154 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7155 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7156 // fshl: X << C | Y >> (BW - C)
7157 // fshr: X << (BW - C) | Y >> C
7158 // where C = Z % BW is not zero
7159 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7160 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7161 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7162 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7163 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7164 } else {
7165 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7166 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7167 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7168 if (isPowerOf2_32(BW)) {
7169 // Z % BW -> Z & (BW - 1)
7170 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7171 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7172 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7173 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7174 } else {
7175 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7176 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7177 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7178 }
7179
7180 auto One = MIRBuilder.buildConstant(ShTy, 1);
7181 if (IsFSHL) {
7182 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7183 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7184 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7185 } else {
7186 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7187 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7188 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7189 }
7190 }
7191
7192 MIRBuilder.buildOr(Dst, ShX, ShY);
7193 MI.eraseFromParent();
7194 return Legalized;
7195}
7196
7199 // These operations approximately do the following (while avoiding undefined
7200 // shifts by BW):
7201 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7202 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7203 Register Dst = MI.getOperand(0).getReg();
7204 LLT Ty = MRI.getType(Dst);
7205 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7206
7207 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7208 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7209
7210 // TODO: Use smarter heuristic that accounts for vector legalization.
7211 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7212 return lowerFunnelShiftAsShifts(MI);
7213
7214 // This only works for powers of 2, fallback to shifts if it fails.
7215 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7216 if (Result == UnableToLegalize)
7217 return lowerFunnelShiftAsShifts(MI);
7218 return Result;
7219}
7220
7222 auto [Dst, Src] = MI.getFirst2Regs();
7223 LLT DstTy = MRI.getType(Dst);
7224 LLT SrcTy = MRI.getType(Src);
7225
7226 uint32_t DstTySize = DstTy.getSizeInBits();
7227 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7228 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7229
7230 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7231 !isPowerOf2_32(SrcTyScalarSize))
7232 return UnableToLegalize;
7233
7234 // The step between extend is too large, split it by creating an intermediate
7235 // extend instruction
7236 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7237 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7238 // If the destination type is illegal, split it into multiple statements
7239 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7240 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7241 // Unmerge the vector
7242 LLT EltTy = MidTy.changeElementCount(
7244 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7245
7246 // ZExt the vectors
7247 LLT ZExtResTy = DstTy.changeElementCount(
7249 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7250 {UnmergeSrc.getReg(0)});
7251 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7252 {UnmergeSrc.getReg(1)});
7253
7254 // Merge the ending vectors
7255 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7256
7257 MI.eraseFromParent();
7258 return Legalized;
7259 }
7260 return UnableToLegalize;
7261}
7262
7264 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7266 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7267 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7268 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7269 // %lo16(<4 x s16>) = G_TRUNC %inlo
7270 // %hi16(<4 x s16>) = G_TRUNC %inhi
7271 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7272 // %res(<8 x s8>) = G_TRUNC %in16
7273
7274 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7275
7276 Register DstReg = MI.getOperand(0).getReg();
7277 Register SrcReg = MI.getOperand(1).getReg();
7278 LLT DstTy = MRI.getType(DstReg);
7279 LLT SrcTy = MRI.getType(SrcReg);
7280
7281 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7283 isPowerOf2_32(SrcTy.getNumElements()) &&
7285 // Split input type.
7286 LLT SplitSrcTy = SrcTy.changeElementCount(
7288
7289 // First, split the source into two smaller vectors.
7290 SmallVector<Register, 2> SplitSrcs;
7291 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7292
7293 // Truncate the splits into intermediate narrower elements.
7294 LLT InterTy;
7295 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7296 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7297 else
7298 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7299 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
7300 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
7301 }
7302
7303 // Combine the new truncates into one vector
7305 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7306
7307 // Truncate the new vector to the final result type
7308 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7309 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7310 else
7311 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7312
7313 MI.eraseFromParent();
7314
7315 return Legalized;
7316 }
7317 return UnableToLegalize;
7318}
7319
7322 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7323 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7324 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7325 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7326 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7327 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7328 MI.eraseFromParent();
7329 return Legalized;
7330}
7331
7333 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7334
7335 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7336 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7337
7339
7340 // If a rotate in the other direction is supported, use it.
7341 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7342 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7343 isPowerOf2_32(EltSizeInBits))
7344 return lowerRotateWithReverseRotate(MI);
7345
7346 // If a funnel shift is supported, use it.
7347 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7348 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7349 bool IsFShLegal = false;
7350 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7351 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7352 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7353 Register R3) {
7354 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7355 MI.eraseFromParent();
7356 return Legalized;
7357 };
7358 // If a funnel shift in the other direction is supported, use it.
7359 if (IsFShLegal) {
7360 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7361 } else if (isPowerOf2_32(EltSizeInBits)) {
7362 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7363 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7364 }
7365 }
7366
7367 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7368 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7369 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7370 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7371 Register ShVal;
7372 Register RevShiftVal;
7373 if (isPowerOf2_32(EltSizeInBits)) {
7374 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7375 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7376 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7377 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7378 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7379 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7380 RevShiftVal =
7381 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7382 } else {
7383 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7384 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7385 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7386 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7387 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7388 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7389 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7390 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7391 RevShiftVal =
7392 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7393 }
7394 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7395 MI.eraseFromParent();
7396 return Legalized;
7397}
7398
7399// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7400// representation.
7403 auto [Dst, Src] = MI.getFirst2Regs();
7404 const LLT S64 = LLT::scalar(64);
7405 const LLT S32 = LLT::scalar(32);
7406 const LLT S1 = LLT::scalar(1);
7407
7408 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7409
7410 // unsigned cul2f(ulong u) {
7411 // uint lz = clz(u);
7412 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7413 // u = (u << lz) & 0x7fffffffffffffffUL;
7414 // ulong t = u & 0xffffffffffUL;
7415 // uint v = (e << 23) | (uint)(u >> 40);
7416 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7417 // return as_float(v + r);
7418 // }
7419
7420 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7421 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7422
7423 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7424
7425 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7426 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7427
7428 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7429 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7430
7431 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7432 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7433
7434 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7435
7436 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7437 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7438
7439 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7440 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7441 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7442
7443 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7444 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7445 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7446 auto One = MIRBuilder.buildConstant(S32, 1);
7447
7448 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7449 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7450 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7451 MIRBuilder.buildAdd(Dst, V, R);
7452
7453 MI.eraseFromParent();
7454 return Legalized;
7455}
7456
7457// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7458// operations and G_SITOFP
7461 auto [Dst, Src] = MI.getFirst2Regs();
7462 const LLT S64 = LLT::scalar(64);
7463 const LLT S32 = LLT::scalar(32);
7464 const LLT S1 = LLT::scalar(1);
7465
7466 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7467
7468 // For i64 < INT_MAX we simply reuse SITOFP.
7469 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7470 // saved before division, convert to float by SITOFP, multiply the result
7471 // by 2.
7472 auto One = MIRBuilder.buildConstant(S64, 1);
7473 auto Zero = MIRBuilder.buildConstant(S64, 0);
7474 // Result if Src < INT_MAX
7475 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7476 // Result if Src >= INT_MAX
7477 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7478 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7479 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7480 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7481 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7482 // Check if the original value is larger than INT_MAX by comparing with
7483 // zero to pick one of the two conversions.
7484 auto IsLarge =
7486 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7487
7488 MI.eraseFromParent();
7489 return Legalized;
7490}
7491
7492// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7493// IEEE double representation.
7496 auto [Dst, Src] = MI.getFirst2Regs();
7497 const LLT S64 = LLT::scalar(64);
7498 const LLT S32 = LLT::scalar(32);
7499
7500 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7501
7502 // We create double value from 32 bit parts with 32 exponent difference.
7503 // Note that + and - are float operations that adjust the implicit leading
7504 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7505 //
7506 // X = 2^52 * 1.0...LowBits
7507 // Y = 2^84 * 1.0...HighBits
7508 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7509 // = - 2^52 * 1.0...HighBits
7510 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7511 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7512 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7513 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7514 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7515 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7516
7517 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7518 LowBits = MIRBuilder.buildZExt(S64, LowBits);
7519 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7520 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7521 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7522 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7523 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7524
7525 MI.eraseFromParent();
7526 return Legalized;
7527}
7528
7530 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7531
7532 if (SrcTy == LLT::scalar(1)) {
7533 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7534 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7535 MIRBuilder.buildSelect(Dst, Src, True, False);
7536 MI.eraseFromParent();
7537 return Legalized;
7538 }
7539
7540 if (SrcTy != LLT::scalar(64))
7541 return UnableToLegalize;
7542
7543 if (DstTy == LLT::scalar(32))
7544 // TODO: SelectionDAG has several alternative expansions to port which may
7545 // be more reasonable depending on the available instructions. We also need
7546 // a more advanced mechanism to choose an optimal version depending on
7547 // target features such as sitofp or CTLZ availability.
7549
7550 if (DstTy == LLT::scalar(64))
7552
7553 return UnableToLegalize;
7554}
7555
7557 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7558
7559 const LLT S64 = LLT::scalar(64);
7560 const LLT S32 = LLT::scalar(32);
7561 const LLT S1 = LLT::scalar(1);
7562
7563 if (SrcTy == S1) {
7564 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7565 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7566 MIRBuilder.buildSelect(Dst, Src, True, False);
7567 MI.eraseFromParent();
7568 return Legalized;
7569 }
7570
7571 if (SrcTy != S64)
7572 return UnableToLegalize;
7573
7574 if (DstTy == S32) {
7575 // signed cl2f(long l) {
7576 // long s = l >> 63;
7577 // float r = cul2f((l + s) ^ s);
7578 // return s ? -r : r;
7579 // }
7580 Register L = Src;
7581 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7582 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7583
7584 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7585 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7586 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7587
7588 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7589 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7591 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7592 MI.eraseFromParent();
7593 return Legalized;
7594 }
7595
7596 return UnableToLegalize;
7597}
7598
7600 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7601 const LLT S64 = LLT::scalar(64);
7602 const LLT S32 = LLT::scalar(32);
7603
7604 if (SrcTy != S64 && SrcTy != S32)
7605 return UnableToLegalize;
7606 if (DstTy != S32 && DstTy != S64)
7607 return UnableToLegalize;
7608
7609 // FPTOSI gives same result as FPTOUI for positive signed integers.
7610 // FPTOUI needs to deal with fp values that convert to unsigned integers
7611 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7612
7613 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7614 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7616 APInt::getZero(SrcTy.getSizeInBits()));
7617 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7618
7619 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7620
7621 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7622 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7623 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7624 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7625 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7626 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7627 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7628
7629 const LLT S1 = LLT::scalar(1);
7630
7631 MachineInstrBuilder FCMP =
7632 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7633 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7634
7635 MI.eraseFromParent();
7636 return Legalized;
7637}
7638
7640 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7641 const LLT S64 = LLT::scalar(64);
7642 const LLT S32 = LLT::scalar(32);
7643
7644 // FIXME: Only f32 to i64 conversions are supported.
7645 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7646 return UnableToLegalize;
7647
7648 // Expand f32 -> i64 conversion
7649 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7650 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7651
7652 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7653
7654 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7655 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7656
7657 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7658 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7659
7660 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7661 APInt::getSignMask(SrcEltBits));
7662 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7663 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7664 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7665 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7666
7667 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7668 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7669 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7670
7671 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7672 R = MIRBuilder.buildZExt(DstTy, R);
7673
7674 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7675 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7676 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7677 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7678
7679 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7680 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7681
7682 const LLT S1 = LLT::scalar(1);
7684 S1, Exponent, ExponentLoBit);
7685
7686 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7687
7688 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7689 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7690
7691 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7692
7693 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7694 S1, Exponent, ZeroSrcTy);
7695
7696 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7697 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7698
7699 MI.eraseFromParent();
7700 return Legalized;
7701}
7702
7705 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7706
7707 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7708 unsigned SatWidth = DstTy.getScalarSizeInBits();
7709
7710 // Determine minimum and maximum integer values and their corresponding
7711 // floating-point values.
7712 APInt MinInt, MaxInt;
7713 if (IsSigned) {
7714 MinInt = APInt::getSignedMinValue(SatWidth);
7715 MaxInt = APInt::getSignedMaxValue(SatWidth);
7716 } else {
7717 MinInt = APInt::getMinValue(SatWidth);
7718 MaxInt = APInt::getMaxValue(SatWidth);
7719 }
7720
7721 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7722 APFloat MinFloat(Semantics);
7723 APFloat MaxFloat(Semantics);
7724
7725 APFloat::opStatus MinStatus =
7726 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7727 APFloat::opStatus MaxStatus =
7728 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7729 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7730 !(MaxStatus & APFloat::opStatus::opInexact);
7731
7732 // If the integer bounds are exactly representable as floats, emit a
7733 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7734 // and selects.
7735 if (AreExactFloatBounds) {
7736 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7737 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7739 SrcTy.changeElementSize(1), Src, MaxC);
7740 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7741 // Clamp by MaxFloat from above. NaN cannot occur.
7742 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7743 auto MinP =
7746 auto Min =
7747 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7748 // Convert clamped value to integer. In the unsigned case we're done,
7749 // because we mapped NaN to MinFloat, which will cast to zero.
7750 if (!IsSigned) {
7751 MIRBuilder.buildFPTOUI(Dst, Min);
7752 MI.eraseFromParent();
7753 return Legalized;
7754 }
7755
7756 // Otherwise, select 0 if Src is NaN.
7757 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7759 DstTy.changeElementSize(1), Src, Src);
7760 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7761 FpToInt);
7762 MI.eraseFromParent();
7763 return Legalized;
7764 }
7765
7766 // Result of direct conversion. The assumption here is that the operation is
7767 // non-trapping and it's fine to apply it to an out-of-range value if we
7768 // select it away later.
7769 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7770 : MIRBuilder.buildFPTOUI(DstTy, Src);
7771
7772 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7773 // MinInt if Src is NaN.
7774 auto ULT =
7776 MIRBuilder.buildFConstant(SrcTy, MinFloat));
7777 auto Max = MIRBuilder.buildSelect(
7778 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7779 // If Src OGT MaxFloat, select MaxInt.
7780 auto OGT =
7782 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7783
7784 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7785 // is already zero.
7786 if (!IsSigned) {
7787 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7788 Max);
7789 MI.eraseFromParent();
7790 return Legalized;
7791 }
7792
7793 // Otherwise, select 0 if Src is NaN.
7794 auto Min = MIRBuilder.buildSelect(
7795 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7797 DstTy.changeElementSize(1), Src, Src);
7798 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7799 MI.eraseFromParent();
7800 return Legalized;
7801}
7802
7803// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7806 const LLT S1 = LLT::scalar(1);
7807 const LLT S32 = LLT::scalar(32);
7808
7809 auto [Dst, Src] = MI.getFirst2Regs();
7810 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7811 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7812
7813 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7814 return UnableToLegalize;
7815
7817 unsigned Flags = MI.getFlags();
7818 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7819 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7820 MI.eraseFromParent();
7821 return Legalized;
7822 }
7823
7824 const unsigned ExpMask = 0x7ff;
7825 const unsigned ExpBiasf64 = 1023;
7826 const unsigned ExpBiasf16 = 15;
7827
7828 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7829 Register U = Unmerge.getReg(0);
7830 Register UH = Unmerge.getReg(1);
7831
7832 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7834
7835 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7836 // add the f16 bias (15) to get the biased exponent for the f16 format.
7837 E = MIRBuilder.buildAdd(
7838 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7839
7842
7843 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7844 MIRBuilder.buildConstant(S32, 0x1ff));
7845 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7846
7847 auto Zero = MIRBuilder.buildConstant(S32, 0);
7848 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7849 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7850 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7851
7852 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7853 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7854 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7855 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7856
7857 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7858 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7859
7860 // N = M | (E << 12);
7861 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7862 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7863
7864 // B = clamp(1-E, 0, 13);
7865 auto One = MIRBuilder.buildConstant(S32, 1);
7866 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7867 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7869
7870 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7871 MIRBuilder.buildConstant(S32, 0x1000));
7872
7873 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7874 auto D0 = MIRBuilder.buildShl(S32, D, B);
7875
7876 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7877 D0, SigSetHigh);
7878 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7879 D = MIRBuilder.buildOr(S32, D, D1);
7880
7881 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7882 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7883
7884 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7886
7887 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7889 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7890
7891 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7893 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7894
7895 V1 = MIRBuilder.buildOr(S32, V0, V1);
7896 V = MIRBuilder.buildAdd(S32, V, V1);
7897
7898 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7899 E, MIRBuilder.buildConstant(S32, 30));
7900 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7901 MIRBuilder.buildConstant(S32, 0x7c00), V);
7902
7903 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7904 E, MIRBuilder.buildConstant(S32, 1039));
7905 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7906
7907 // Extract the sign bit.
7908 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7909 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7910
7911 // Insert the sign bit
7912 V = MIRBuilder.buildOr(S32, Sign, V);
7913
7914 MIRBuilder.buildTrunc(Dst, V);
7915 MI.eraseFromParent();
7916 return Legalized;
7917}
7918
7921 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7922 const LLT S64 = LLT::scalar(64);
7923 const LLT S16 = LLT::scalar(16);
7924
7925 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7927
7928 return UnableToLegalize;
7929}
7930
7932 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7933 LLT Ty = MRI.getType(Dst);
7934
7935 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7936 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7937 MI.eraseFromParent();
7938 return Legalized;
7939}
7940
7942 switch (Opc) {
7943 case TargetOpcode::G_SMIN:
7944 return CmpInst::ICMP_SLT;
7945 case TargetOpcode::G_SMAX:
7946 return CmpInst::ICMP_SGT;
7947 case TargetOpcode::G_UMIN:
7948 return CmpInst::ICMP_ULT;
7949 case TargetOpcode::G_UMAX:
7950 return CmpInst::ICMP_UGT;
7951 default:
7952 llvm_unreachable("not in integer min/max");
7953 }
7954}
7955
7957 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7958
7959 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7960 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7961
7962 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7963 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7964
7965 MI.eraseFromParent();
7966 return Legalized;
7967}
7968
7971 GSUCmp *Cmp = cast<GSUCmp>(&MI);
7972
7973 Register Dst = Cmp->getReg(0);
7974 LLT DstTy = MRI.getType(Dst);
7975 LLT SrcTy = MRI.getType(Cmp->getReg(1));
7976 LLT CmpTy = DstTy.changeElementSize(1);
7977
7978 CmpInst::Predicate LTPredicate = Cmp->isSigned()
7981 CmpInst::Predicate GTPredicate = Cmp->isSigned()
7984
7985 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
7986 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
7987 Cmp->getRHSReg());
7988 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
7989 Cmp->getRHSReg());
7990
7991 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
7992 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
7995 auto One = MIRBuilder.buildConstant(DstTy, 1);
7996 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
7997
7998 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
7999 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8000 } else {
8002 std::swap(IsGT, IsLT);
8003 // Extend boolean results to DstTy, which is at least i2, before subtracting
8004 // them.
8005 unsigned BoolExtOp =
8006 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8007 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8008 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8009 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8010 }
8011
8012 MI.eraseFromParent();
8013 return Legalized;
8014}
8015
8018 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8019 const int Src0Size = Src0Ty.getScalarSizeInBits();
8020 const int Src1Size = Src1Ty.getScalarSizeInBits();
8021
8022 auto SignBitMask = MIRBuilder.buildConstant(
8023 Src0Ty, APInt::getSignMask(Src0Size));
8024
8025 auto NotSignBitMask = MIRBuilder.buildConstant(
8026 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8027
8028 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8029 Register And1;
8030 if (Src0Ty == Src1Ty) {
8031 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8032 } else if (Src0Size > Src1Size) {
8033 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8034 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8035 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8036 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8037 } else {
8038 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8039 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8040 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8041 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8042 }
8043
8044 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8045 // constants are a nan and -0.0, but the final result should preserve
8046 // everything.
8047 unsigned Flags = MI.getFlags();
8048
8049 // We masked the sign bit and the not-sign bit, so these are disjoint.
8050 Flags |= MachineInstr::Disjoint;
8051
8052 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8053
8054 MI.eraseFromParent();
8055 return Legalized;
8056}
8057
8060 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8061 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8062
8063 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8064 LLT Ty = MRI.getType(Dst);
8065
8066 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8067 // Insert canonicalizes if it's possible we need to quiet to get correct
8068 // sNaN behavior.
8069
8070 // Note this must be done here, and not as an optimization combine in the
8071 // absence of a dedicate quiet-snan instruction as we're using an
8072 // omni-purpose G_FCANONICALIZE.
8073 if (!isKnownNeverSNaN(Src0, MRI))
8074 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8075
8076 if (!isKnownNeverSNaN(Src1, MRI))
8077 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8078 }
8079
8080 // If there are no nans, it's safe to simply replace this with the non-IEEE
8081 // version.
8082 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8083 MI.eraseFromParent();
8084 return Legalized;
8085}
8086
8088 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8089 Register DstReg = MI.getOperand(0).getReg();
8090 LLT Ty = MRI.getType(DstReg);
8091 unsigned Flags = MI.getFlags();
8092
8093 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8094 Flags);
8095 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8096 MI.eraseFromParent();
8097 return Legalized;
8098}
8099
8102 auto [DstReg, X] = MI.getFirst2Regs();
8103 const unsigned Flags = MI.getFlags();
8104 const LLT Ty = MRI.getType(DstReg);
8105 const LLT CondTy = Ty.changeElementSize(1);
8106
8107 // round(x) =>
8108 // t = trunc(x);
8109 // d = fabs(x - t);
8110 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8111 // return t + o;
8112
8113 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8114
8115 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8116 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8117
8118 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8119 auto Cmp =
8120 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8121
8122 // Could emit G_UITOFP instead
8123 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8124 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8125 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8126 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8127
8128 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8129
8130 MI.eraseFromParent();
8131 return Legalized;
8132}
8133
8135 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8136 unsigned Flags = MI.getFlags();
8137 LLT Ty = MRI.getType(DstReg);
8138 const LLT CondTy = Ty.changeElementSize(1);
8139
8140 // result = trunc(src);
8141 // if (src < 0.0 && src != result)
8142 // result += -1.0.
8143
8144 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8145 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8146
8147 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8148 SrcReg, Zero, Flags);
8149 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8150 SrcReg, Trunc, Flags);
8151 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8152 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8153
8154 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8155 MI.eraseFromParent();
8156 return Legalized;
8157}
8158
8161 const unsigned NumOps = MI.getNumOperands();
8162 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8163 unsigned PartSize = Src0Ty.getSizeInBits();
8164
8165 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8166 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8167
8168 for (unsigned I = 2; I != NumOps; ++I) {
8169 const unsigned Offset = (I - 1) * PartSize;
8170
8171 Register SrcReg = MI.getOperand(I).getReg();
8172 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8173
8174 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8175 MRI.createGenericVirtualRegister(WideTy);
8176
8177 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8178 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8179 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8180 ResultReg = NextResult;
8181 }
8182
8183 if (DstTy.isPointer()) {
8185 DstTy.getAddressSpace())) {
8186 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8187 return UnableToLegalize;
8188 }
8189
8190 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8191 }
8192
8193 MI.eraseFromParent();
8194 return Legalized;
8195}
8196
8199 const unsigned NumDst = MI.getNumOperands() - 1;
8200 Register SrcReg = MI.getOperand(NumDst).getReg();
8201 Register Dst0Reg = MI.getOperand(0).getReg();
8202 LLT DstTy = MRI.getType(Dst0Reg);
8203 if (DstTy.isPointer())
8204 return UnableToLegalize; // TODO
8205
8206 SrcReg = coerceToScalar(SrcReg);
8207 if (!SrcReg)
8208 return UnableToLegalize;
8209
8210 // Expand scalarizing unmerge as bitcast to integer and shift.
8211 LLT IntTy = MRI.getType(SrcReg);
8212
8213 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8214
8215 const unsigned DstSize = DstTy.getSizeInBits();
8216 unsigned Offset = DstSize;
8217 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8218 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8219 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8220 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8221 }
8222
8223 MI.eraseFromParent();
8224 return Legalized;
8225}
8226
8227/// Lower a vector extract or insert by writing the vector to a stack temporary
8228/// and reloading the element or vector.
8229///
8230/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8231/// =>
8232/// %stack_temp = G_FRAME_INDEX
8233/// G_STORE %vec, %stack_temp
8234/// %idx = clamp(%idx, %vec.getNumElements())
8235/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8236/// %dst = G_LOAD %element_ptr
8239 Register DstReg = MI.getOperand(0).getReg();
8240 Register SrcVec = MI.getOperand(1).getReg();
8241 Register InsertVal;
8242 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8243 InsertVal = MI.getOperand(2).getReg();
8244
8245 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8246
8247 LLT VecTy = MRI.getType(SrcVec);
8248 LLT EltTy = VecTy.getElementType();
8249 unsigned NumElts = VecTy.getNumElements();
8250
8251 int64_t IdxVal;
8252 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8254 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8255
8256 if (InsertVal) {
8257 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8258 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8259 } else {
8260 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8261 }
8262
8263 MI.eraseFromParent();
8264 return Legalized;
8265 }
8266
8267 if (!EltTy.isByteSized()) { // Not implemented.
8268 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8269 return UnableToLegalize;
8270 }
8271
8272 unsigned EltBytes = EltTy.getSizeInBytes();
8273 Align VecAlign = getStackTemporaryAlignment(VecTy);
8274 Align EltAlign;
8275
8276 MachinePointerInfo PtrInfo;
8277 auto StackTemp = createStackTemporary(
8278 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8279 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8280
8281 // Get the pointer to the element, and be sure not to hit undefined behavior
8282 // if the index is out of bounds.
8283 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8284
8285 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8286 int64_t Offset = IdxVal * EltBytes;
8287 PtrInfo = PtrInfo.getWithOffset(Offset);
8288 EltAlign = commonAlignment(VecAlign, Offset);
8289 } else {
8290 // We lose information with a variable offset.
8291 EltAlign = getStackTemporaryAlignment(EltTy);
8292 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8293 }
8294
8295 if (InsertVal) {
8296 // Write the inserted element
8297 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8298
8299 // Reload the whole vector.
8300 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8301 } else {
8302 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8303 }
8304
8305 MI.eraseFromParent();
8306 return Legalized;
8307}
8308
8311 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8312 MI.getFirst3RegLLTs();
8313 LLT IdxTy = LLT::scalar(32);
8314
8315 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8316 Register Undef;
8318 LLT EltTy = DstTy.getScalarType();
8319
8320 for (int Idx : Mask) {
8321 if (Idx < 0) {
8322 if (!Undef.isValid())
8323 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8324 BuildVec.push_back(Undef);
8325 continue;
8326 }
8327
8328 if (Src0Ty.isScalar()) {
8329 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8330 } else {
8331 int NumElts = Src0Ty.getNumElements();
8332 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8333 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8334 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8335 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8336 BuildVec.push_back(Extract.getReg(0));
8337 }
8338 }
8339
8340 if (DstTy.isScalar())
8341 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8342 else
8343 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8344 MI.eraseFromParent();
8345 return Legalized;
8346}
8347
8350 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8351 MI.getFirst4RegLLTs();
8352
8353 if (VecTy.isScalableVector())
8354 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8355
8356 Align VecAlign = getStackTemporaryAlignment(VecTy);
8357 MachinePointerInfo PtrInfo;
8358 Register StackPtr =
8359 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8360 PtrInfo)
8361 .getReg(0);
8362 MachinePointerInfo ValPtrInfo =
8364
8365 LLT IdxTy = LLT::scalar(32);
8366 LLT ValTy = VecTy.getElementType();
8367 Align ValAlign = getStackTemporaryAlignment(ValTy);
8368
8369 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8370
8371 bool HasPassthru =
8372 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
8373
8374 if (HasPassthru)
8375 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
8376
8377 Register LastWriteVal;
8378 std::optional<APInt> PassthruSplatVal =
8379 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
8380
8381 if (PassthruSplatVal.has_value()) {
8382 LastWriteVal =
8383 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
8384 } else if (HasPassthru) {
8385 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
8386 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
8387 {LLT::scalar(32)}, {Popcount});
8388
8389 Register LastElmtPtr =
8390 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
8391 LastWriteVal =
8392 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
8393 .getReg(0);
8394 }
8395
8396 unsigned NumElmts = VecTy.getNumElements();
8397 for (unsigned I = 0; I < NumElmts; ++I) {
8398 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
8399 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
8400 Register ElmtPtr =
8401 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8402 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
8403
8404 LLT MaskITy = MaskTy.getElementType();
8405 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
8406 if (MaskITy.getSizeInBits() > 1)
8407 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
8408
8409 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
8410 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
8411
8412 if (HasPassthru && I == NumElmts - 1) {
8413 auto EndOfVector =
8414 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
8415 auto AllLanesSelected = MIRBuilder.buildICmp(
8416 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
8417 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
8418 {OutPos, EndOfVector});
8419 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
8420
8421 LastWriteVal =
8422 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
8423 .getReg(0);
8424 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
8425 }
8426 }
8427
8428 // TODO: Use StackPtr's FrameIndex alignment.
8429 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
8430
8431 MI.eraseFromParent();
8432 return Legalized;
8433}
8434
8436 Register AllocSize,
8437 Align Alignment,
8438 LLT PtrTy) {
8439 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
8440
8441 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
8442 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
8443
8444 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8445 // have to generate an extra instruction to negate the alloc and then use
8446 // G_PTR_ADD to add the negative offset.
8447 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
8448 if (Alignment > Align(1)) {
8449 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
8450 AlignMask.negate();
8451 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
8452 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
8453 }
8454
8455 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
8456}
8457
8460 const auto &MF = *MI.getMF();
8461 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8462 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
8463 return UnableToLegalize;
8464
8465 Register Dst = MI.getOperand(0).getReg();
8466 Register AllocSize = MI.getOperand(1).getReg();
8467 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
8468
8469 LLT PtrTy = MRI.getType(Dst);
8471 Register SPTmp =
8472 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
8473
8474 MIRBuilder.buildCopy(SPReg, SPTmp);
8475 MIRBuilder.buildCopy(Dst, SPTmp);
8476
8477 MI.eraseFromParent();
8478 return Legalized;
8479}
8480
8484 if (!StackPtr)
8485 return UnableToLegalize;
8486
8487 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
8488 MI.eraseFromParent();
8489 return Legalized;
8490}
8491
8495 if (!StackPtr)
8496 return UnableToLegalize;
8497
8498 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
8499 MI.eraseFromParent();
8500 return Legalized;
8501}
8502
8505 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8506 unsigned Offset = MI.getOperand(2).getImm();
8507
8508 // Extract sub-vector or one element
8509 if (SrcTy.isVector()) {
8510 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
8511 unsigned DstSize = DstTy.getSizeInBits();
8512
8513 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8514 (Offset + DstSize <= SrcTy.getSizeInBits())) {
8515 // Unmerge and allow access to each Src element for the artifact combiner.
8516 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
8517
8518 // Take element(s) we need to extract and copy it (merge them).
8519 SmallVector<Register, 8> SubVectorElts;
8520 for (unsigned Idx = Offset / SrcEltSize;
8521 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
8522 SubVectorElts.push_back(Unmerge.getReg(Idx));
8523 }
8524 if (SubVectorElts.size() == 1)
8525 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
8526 else
8527 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
8528
8529 MI.eraseFromParent();
8530 return Legalized;
8531 }
8532 }
8533
8534 if (DstTy.isScalar() &&
8535 (SrcTy.isScalar() ||
8536 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8537 LLT SrcIntTy = SrcTy;
8538 if (!SrcTy.isScalar()) {
8539 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
8540 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
8541 }
8542
8543 if (Offset == 0)
8544 MIRBuilder.buildTrunc(DstReg, SrcReg);
8545 else {
8546 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
8547 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8548 MIRBuilder.buildTrunc(DstReg, Shr);
8549 }
8550
8551 MI.eraseFromParent();
8552 return Legalized;
8553 }
8554
8555 return UnableToLegalize;
8556}
8557
8559 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
8560 uint64_t Offset = MI.getOperand(3).getImm();
8561
8562 LLT DstTy = MRI.getType(Src);
8563 LLT InsertTy = MRI.getType(InsertSrc);
8564
8565 // Insert sub-vector or one element
8566 if (DstTy.isVector() && !InsertTy.isPointer()) {
8567 LLT EltTy = DstTy.getElementType();
8568 unsigned EltSize = EltTy.getSizeInBits();
8569 unsigned InsertSize = InsertTy.getSizeInBits();
8570
8571 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8572 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8573 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8575 unsigned Idx = 0;
8576 // Elements from Src before insert start Offset
8577 for (; Idx < Offset / EltSize; ++Idx) {
8578 DstElts.push_back(UnmergeSrc.getReg(Idx));
8579 }
8580
8581 // Replace elements in Src with elements from InsertSrc
8582 if (InsertTy.getSizeInBits() > EltSize) {
8583 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8584 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8585 ++Idx, ++i) {
8586 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8587 }
8588 } else {
8589 DstElts.push_back(InsertSrc);
8590 ++Idx;
8591 }
8592
8593 // Remaining elements from Src after insert
8594 for (; Idx < DstTy.getNumElements(); ++Idx) {
8595 DstElts.push_back(UnmergeSrc.getReg(Idx));
8596 }
8597
8598 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
8599 MI.eraseFromParent();
8600 return Legalized;
8601 }
8602 }
8603
8604 if (InsertTy.isVector() ||
8605 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8606 return UnableToLegalize;
8607
8609 if ((DstTy.isPointer() &&
8610 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8611 (InsertTy.isPointer() &&
8612 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8613 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8614 return UnableToLegalize;
8615 }
8616
8617 LLT IntDstTy = DstTy;
8618
8619 if (!DstTy.isScalar()) {
8620 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8621 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8622 }
8623
8624 if (!InsertTy.isScalar()) {
8625 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8626 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8627 }
8628
8629 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8630 if (Offset != 0) {
8631 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8632 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8633 }
8634
8636 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8637
8638 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8639 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8640 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8641
8642 MIRBuilder.buildCast(Dst, Or);
8643 MI.eraseFromParent();
8644 return Legalized;
8645}
8646
8649 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8650 MI.getFirst4RegLLTs();
8651 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8652
8653 LLT Ty = Dst0Ty;
8654 LLT BoolTy = Dst1Ty;
8655
8656 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8657
8658 if (IsAdd)
8659 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
8660 else
8661 MIRBuilder.buildSub(NewDst0, LHS, RHS);
8662
8663 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8664
8665 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8666
8667 // For an addition, the result should be less than one of the operands (LHS)
8668 // if and only if the other operand (RHS) is negative, otherwise there will
8669 // be overflow.
8670 // For a subtraction, the result should be less than one of the operands
8671 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8672 // otherwise there will be overflow.
8673 auto ResultLowerThanLHS =
8674 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
8675 auto ConditionRHS = MIRBuilder.buildICmp(
8676 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8677
8678 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
8679
8680 MIRBuilder.buildCopy(Dst0, NewDst0);
8681 MI.eraseFromParent();
8682
8683 return Legalized;
8684}
8685
8688 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8689 LLT Ty = MRI.getType(Res);
8690 bool IsSigned;
8691 bool IsAdd;
8692 unsigned BaseOp;
8693 switch (MI.getOpcode()) {
8694 default:
8695 llvm_unreachable("unexpected addsat/subsat opcode");
8696 case TargetOpcode::G_UADDSAT:
8697 IsSigned = false;
8698 IsAdd = true;
8699 BaseOp = TargetOpcode::G_ADD;
8700 break;
8701 case TargetOpcode::G_SADDSAT:
8702 IsSigned = true;
8703 IsAdd = true;
8704 BaseOp = TargetOpcode::G_ADD;
8705 break;
8706 case TargetOpcode::G_USUBSAT:
8707 IsSigned = false;
8708 IsAdd = false;
8709 BaseOp = TargetOpcode::G_SUB;
8710 break;
8711 case TargetOpcode::G_SSUBSAT:
8712 IsSigned = true;
8713 IsAdd = false;
8714 BaseOp = TargetOpcode::G_SUB;
8715 break;
8716 }
8717
8718 if (IsSigned) {
8719 // sadd.sat(a, b) ->
8720 // hi = 0x7fffffff - smax(a, 0)
8721 // lo = 0x80000000 - smin(a, 0)
8722 // a + smin(smax(lo, b), hi)
8723 // ssub.sat(a, b) ->
8724 // lo = smax(a, -1) - 0x7fffffff
8725 // hi = smin(a, -1) - 0x80000000
8726 // a - smin(smax(lo, b), hi)
8727 // TODO: AMDGPU can use a "median of 3" instruction here:
8728 // a +/- med3(lo, b, hi)
8729 uint64_t NumBits = Ty.getScalarSizeInBits();
8730 auto MaxVal =
8732 auto MinVal =
8735 if (IsAdd) {
8736 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8737 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8738 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8739 } else {
8740 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8741 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8742 MaxVal);
8743 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8744 MinVal);
8745 }
8746 auto RHSClamped =
8748 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8749 } else {
8750 // uadd.sat(a, b) -> a + umin(~a, b)
8751 // usub.sat(a, b) -> a - umin(a, b)
8752 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8753 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8754 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8755 }
8756
8757 MI.eraseFromParent();
8758 return Legalized;
8759}
8760
8763 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8764 LLT Ty = MRI.getType(Res);
8765 LLT BoolTy = Ty.changeElementSize(1);
8766 bool IsSigned;
8767 bool IsAdd;
8768 unsigned OverflowOp;
8769 switch (MI.getOpcode()) {
8770 default:
8771 llvm_unreachable("unexpected addsat/subsat opcode");
8772 case TargetOpcode::G_UADDSAT:
8773 IsSigned = false;
8774 IsAdd = true;
8775 OverflowOp = TargetOpcode::G_UADDO;
8776 break;
8777 case TargetOpcode::G_SADDSAT:
8778 IsSigned = true;
8779 IsAdd = true;
8780 OverflowOp = TargetOpcode::G_SADDO;
8781 break;
8782 case TargetOpcode::G_USUBSAT:
8783 IsSigned = false;
8784 IsAdd = false;
8785 OverflowOp = TargetOpcode::G_USUBO;
8786 break;
8787 case TargetOpcode::G_SSUBSAT:
8788 IsSigned = true;
8789 IsAdd = false;
8790 OverflowOp = TargetOpcode::G_SSUBO;
8791 break;
8792 }
8793
8794 auto OverflowRes =
8795 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8796 Register Tmp = OverflowRes.getReg(0);
8797 Register Ov = OverflowRes.getReg(1);
8798 MachineInstrBuilder Clamp;
8799 if (IsSigned) {
8800 // sadd.sat(a, b) ->
8801 // {tmp, ov} = saddo(a, b)
8802 // ov ? (tmp >>s 31) + 0x80000000 : r
8803 // ssub.sat(a, b) ->
8804 // {tmp, ov} = ssubo(a, b)
8805 // ov ? (tmp >>s 31) + 0x80000000 : r
8806 uint64_t NumBits = Ty.getScalarSizeInBits();
8807 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8808 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8809 auto MinVal =
8811 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8812 } else {
8813 // uadd.sat(a, b) ->
8814 // {tmp, ov} = uaddo(a, b)
8815 // ov ? 0xffffffff : tmp
8816 // usub.sat(a, b) ->
8817 // {tmp, ov} = usubo(a, b)
8818 // ov ? 0 : tmp
8819 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8820 }
8821 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8822
8823 MI.eraseFromParent();
8824 return Legalized;
8825}
8826
8829 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8830 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8831 "Expected shlsat opcode!");
8832 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8833 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8834 LLT Ty = MRI.getType(Res);
8835 LLT BoolTy = Ty.changeElementSize(1);
8836
8837 unsigned BW = Ty.getScalarSizeInBits();
8838 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8839 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8840 : MIRBuilder.buildLShr(Ty, Result, RHS);
8841
8842 MachineInstrBuilder SatVal;
8843 if (IsSigned) {
8844 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8845 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8846 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8847 MIRBuilder.buildConstant(Ty, 0));
8848 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8849 } else {
8851 }
8852 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8853 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8854
8855 MI.eraseFromParent();
8856 return Legalized;
8857}
8858
8860 auto [Dst, Src] = MI.getFirst2Regs();
8861 const LLT Ty = MRI.getType(Src);
8862 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8863 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8864
8865 // Swap most and least significant byte, set remaining bytes in Res to zero.
8866 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8867 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8868 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8869 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8870
8871 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8872 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8873 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8874 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8875 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8876 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8877 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8878 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8879 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8880 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8881 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8882 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8883 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8884 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8885 }
8886 Res.getInstr()->getOperand(0).setReg(Dst);
8887
8888 MI.eraseFromParent();
8889 return Legalized;
8890}
8891
8892//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8894 MachineInstrBuilder Src, const APInt &Mask) {
8895 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8896 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8897 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8898 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8899 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8900 return B.buildOr(Dst, LHS, RHS);
8901}
8902
8905 auto [Dst, Src] = MI.getFirst2Regs();
8906 const LLT Ty = MRI.getType(Src);
8907 unsigned Size = Ty.getScalarSizeInBits();
8908
8909 if (Size >= 8) {
8910 MachineInstrBuilder BSWAP =
8911 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8912
8913 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8914 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8915 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8916 MachineInstrBuilder Swap4 =
8917 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8918
8919 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8920 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8921 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8922 MachineInstrBuilder Swap2 =
8923 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8924
8925 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8926 // 6|7
8927 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8928 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8929 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8930 } else {
8931 // Expand bitreverse for types smaller than 8 bits.
8933 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8935 if (I < J) {
8936 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8937 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8938 } else {
8939 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8940 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8941 }
8942
8943 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
8944 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8945 if (I == 0)
8946 Tmp = Tmp2;
8947 else
8948 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8949 }
8950 MIRBuilder.buildCopy(Dst, Tmp);
8951 }
8952
8953 MI.eraseFromParent();
8954 return Legalized;
8955}
8956
8960
8961 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8962 int NameOpIdx = IsRead ? 1 : 0;
8963 int ValRegIndex = IsRead ? 0 : 1;
8964
8965 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8966 const LLT Ty = MRI.getType(ValReg);
8967 const MDString *RegStr = cast<MDString>(
8968 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8969
8970 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8971 if (!PhysReg.isValid())
8972 return UnableToLegalize;
8973
8974 if (IsRead)
8975 MIRBuilder.buildCopy(ValReg, PhysReg);
8976 else
8977 MIRBuilder.buildCopy(PhysReg, ValReg);
8978
8979 MI.eraseFromParent();
8980 return Legalized;
8981}
8982
8985 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8986 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8987 Register Result = MI.getOperand(0).getReg();
8988 LLT OrigTy = MRI.getType(Result);
8989 auto SizeInBits = OrigTy.getScalarSizeInBits();
8990 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8991
8992 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8993 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8994 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8995 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8996
8997 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8998 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8999 MIRBuilder.buildTrunc(Result, Shifted);
9000
9001 MI.eraseFromParent();
9002 return Legalized;
9003}
9004
9007 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9008 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9009
9010 if (Mask == fcNone) {
9011 MIRBuilder.buildConstant(DstReg, 0);
9012 MI.eraseFromParent();
9013 return Legalized;
9014 }
9015 if (Mask == fcAllFlags) {
9016 MIRBuilder.buildConstant(DstReg, 1);
9017 MI.eraseFromParent();
9018 return Legalized;
9019 }
9020
9021 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9022 // version
9023
9024 unsigned BitSize = SrcTy.getScalarSizeInBits();
9025 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9026
9027 LLT IntTy = LLT::scalar(BitSize);
9028 if (SrcTy.isVector())
9029 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9030 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9031
9032 // Various masks.
9033 APInt SignBit = APInt::getSignMask(BitSize);
9034 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9035 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9036 APInt ExpMask = Inf;
9037 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9038 APInt QNaNBitMask =
9039 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9040 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9041
9042 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9043 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9044 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9045 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9046 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9047
9048 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9049 auto Sign =
9051
9052 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9053 // Clang doesn't support capture of structured bindings:
9054 LLT DstTyCopy = DstTy;
9055 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9056 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9057 };
9058
9059 // Tests that involve more than one class should be processed first.
9060 if ((Mask & fcFinite) == fcFinite) {
9061 // finite(V) ==> abs(V) u< exp_mask
9062 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9063 ExpMaskC));
9064 Mask &= ~fcFinite;
9065 } else if ((Mask & fcFinite) == fcPosFinite) {
9066 // finite(V) && V > 0 ==> V u< exp_mask
9067 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9068 ExpMaskC));
9069 Mask &= ~fcPosFinite;
9070 } else if ((Mask & fcFinite) == fcNegFinite) {
9071 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9072 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9073 ExpMaskC);
9074 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9075 appendToRes(And);
9076 Mask &= ~fcNegFinite;
9077 }
9078
9079 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9080 // fcZero | fcSubnormal => test all exponent bits are 0
9081 // TODO: Handle sign bit specific cases
9082 // TODO: Handle inverted case
9083 if (PartialCheck == (fcZero | fcSubnormal)) {
9084 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9086 ExpBits, ZeroC));
9087 Mask &= ~PartialCheck;
9088 }
9089 }
9090
9091 // Check for individual classes.
9092 if (FPClassTest PartialCheck = Mask & fcZero) {
9093 if (PartialCheck == fcPosZero)
9095 AsInt, ZeroC));
9096 else if (PartialCheck == fcZero)
9097 appendToRes(
9099 else // fcNegZero
9101 AsInt, SignBitC));
9102 }
9103
9104 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9105 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9106 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9107 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9108 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9109 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9110 auto SubnormalRes =
9112 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9113 if (PartialCheck == fcNegSubnormal)
9114 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9115 appendToRes(SubnormalRes);
9116 }
9117
9118 if (FPClassTest PartialCheck = Mask & fcInf) {
9119 if (PartialCheck == fcPosInf)
9121 AsInt, InfC));
9122 else if (PartialCheck == fcInf)
9123 appendToRes(
9125 else { // fcNegInf
9126 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9127 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9129 AsInt, NegInfC));
9130 }
9131 }
9132
9133 if (FPClassTest PartialCheck = Mask & fcNan) {
9134 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9135 if (PartialCheck == fcNan) {
9136 // isnan(V) ==> abs(V) u> int(inf)
9137 appendToRes(
9139 } else if (PartialCheck == fcQNan) {
9140 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9141 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9142 InfWithQnanBitC));
9143 } else { // fcSNan
9144 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9145 // abs(V) u< (unsigned(Inf) | quiet_bit)
9146 auto IsNan =
9148 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9149 Abs, InfWithQnanBitC);
9150 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9151 }
9152 }
9153
9154 if (FPClassTest PartialCheck = Mask & fcNormal) {
9155 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9156 // (max_exp-1))
9157 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9158 auto ExpMinusOne = MIRBuilder.buildSub(
9159 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9160 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9161 auto NormalRes =
9163 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9164 if (PartialCheck == fcNegNormal)
9165 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9166 else if (PartialCheck == fcPosNormal) {
9167 auto PosSign = MIRBuilder.buildXor(
9168 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
9169 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9170 }
9171 appendToRes(NormalRes);
9172 }
9173
9174 MIRBuilder.buildCopy(DstReg, Res);
9175 MI.eraseFromParent();
9176 return Legalized;
9177}
9178
9180 // Implement G_SELECT in terms of XOR, AND, OR.
9181 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9182 MI.getFirst4RegLLTs();
9183
9184 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9185 if (IsEltPtr) {
9186 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9187 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9188 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9189 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9190 DstTy = NewTy;
9191 }
9192
9193 if (MaskTy.isScalar()) {
9194 // Turn the scalar condition into a vector condition mask if needed.
9195
9196 Register MaskElt = MaskReg;
9197
9198 // The condition was potentially zero extended before, but we want a sign
9199 // extended boolean.
9200 if (MaskTy != LLT::scalar(1))
9201 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9202
9203 // Continue the sign extension (or truncate) to match the data type.
9204 MaskElt =
9205 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9206
9207 if (DstTy.isVector()) {
9208 // Generate a vector splat idiom.
9209 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9210 MaskReg = ShufSplat.getReg(0);
9211 } else {
9212 MaskReg = MaskElt;
9213 }
9214 MaskTy = DstTy;
9215 } else if (!DstTy.isVector()) {
9216 // Cannot handle the case that mask is a vector and dst is a scalar.
9217 return UnableToLegalize;
9218 }
9219
9220 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9221 return UnableToLegalize;
9222 }
9223
9224 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9225 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9226 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9227 if (IsEltPtr) {
9228 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9229 MIRBuilder.buildIntToPtr(DstReg, Or);
9230 } else {
9231 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9232 }
9233 MI.eraseFromParent();
9234 return Legalized;
9235}
9236
9238 // Split DIVREM into individual instructions.
9239 unsigned Opcode = MI.getOpcode();
9240
9242 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9243 : TargetOpcode::G_UDIV,
9244 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9246 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9247 : TargetOpcode::G_UREM,
9248 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9249 MI.eraseFromParent();
9250 return Legalized;
9251}
9252
9255 // Expand %res = G_ABS %a into:
9256 // %v1 = G_ASHR %a, scalar_size-1
9257 // %v2 = G_ADD %a, %v1
9258 // %res = G_XOR %v2, %v1
9259 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9260 Register OpReg = MI.getOperand(1).getReg();
9261 auto ShiftAmt =
9262 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9263 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9264 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9265 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9266 MI.eraseFromParent();
9267 return Legalized;
9268}
9269
9272 // Expand %res = G_ABS %a into:
9273 // %v1 = G_CONSTANT 0
9274 // %v2 = G_SUB %v1, %a
9275 // %res = G_SMAX %a, %v2
9276 Register SrcReg = MI.getOperand(1).getReg();
9277 LLT Ty = MRI.getType(SrcReg);
9278 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9279 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9280 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9281 MI.eraseFromParent();
9282 return Legalized;
9283}
9284
9287 Register SrcReg = MI.getOperand(1).getReg();
9288 Register DestReg = MI.getOperand(0).getReg();
9289 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9290 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9291 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9292 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9293 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9294 MI.eraseFromParent();
9295 return Legalized;
9296}
9297
9299 Register SrcReg = MI.getOperand(1).getReg();
9300 Register DstReg = MI.getOperand(0).getReg();
9301
9302 LLT Ty = MRI.getType(DstReg);
9303
9304 // Reset sign bit
9306 DstReg, SrcReg,
9309
9310 MI.eraseFromParent();
9311 return Legalized;
9312}
9313
9316 Register SrcReg = MI.getOperand(1).getReg();
9317 LLT SrcTy = MRI.getType(SrcReg);
9318 LLT DstTy = MRI.getType(SrcReg);
9319
9320 // The source could be a scalar if the IR type was <1 x sN>.
9321 if (SrcTy.isScalar()) {
9322 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
9323 return UnableToLegalize; // FIXME: handle extension.
9324 // This can be just a plain copy.
9326 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
9328 return Legalized;
9329 }
9330 return UnableToLegalize;
9331}
9332
9334 MachineFunction &MF = *MI.getMF();
9336 LLVMContext &Ctx = MF.getFunction().getContext();
9337 Register ListPtr = MI.getOperand(1).getReg();
9338 LLT PtrTy = MRI.getType(ListPtr);
9339
9340 // LstPtr is a pointer to the head of the list. Get the address
9341 // of the head of the list.
9342 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
9343 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
9344 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
9345 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
9346
9347 const Align A(MI.getOperand(2).getImm());
9348 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
9349 if (A > TLI.getMinStackArgumentAlignment()) {
9350 Register AlignAmt =
9351 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
9352 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
9353 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
9354 VAList = AndDst.getReg(0);
9355 }
9356
9357 // Increment the pointer, VAList, to the next vaarg
9358 // The list should be bumped by the size of element in the current head of
9359 // list.
9360 Register Dst = MI.getOperand(0).getReg();
9361 LLT LLTTy = MRI.getType(Dst);
9362 Type *Ty = getTypeForLLT(LLTTy, Ctx);
9363 auto IncAmt =
9364 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
9365 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
9366
9367 // Store the increment VAList to the legalized pointer
9369 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
9370 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
9371 // Load the actual argument out of the pointer VAList
9372 Align EltAlignment = DL.getABITypeAlign(Ty);
9373 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
9374 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
9375 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
9376
9377 MI.eraseFromParent();
9378 return Legalized;
9379}
9380
9382 // On Darwin, -Os means optimize for size without hurting performance, so
9383 // only really optimize for size when -Oz (MinSize) is used.
9385 return MF.getFunction().hasMinSize();
9386 return MF.getFunction().hasOptSize();
9387}
9388
9389// Returns a list of types to use for memory op lowering in MemOps. A partial
9390// port of findOptimalMemOpLowering in TargetLowering.
9391static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
9392 unsigned Limit, const MemOp &Op,
9393 unsigned DstAS, unsigned SrcAS,
9394 const AttributeList &FuncAttributes,
9395 const TargetLowering &TLI) {
9396 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
9397 return false;
9398
9399 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
9400
9401 if (Ty == LLT()) {
9402 // Use the largest scalar type whose alignment constraints are satisfied.
9403 // We only need to check DstAlign here as SrcAlign is always greater or
9404 // equal to DstAlign (or zero).
9405 Ty = LLT::scalar(64);
9406 if (Op.isFixedDstAlign())
9407 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
9408 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
9409 Ty = LLT::scalar(Ty.getSizeInBytes());
9410 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
9411 // FIXME: check for the largest legal type we can load/store to.
9412 }
9413
9414 unsigned NumMemOps = 0;
9415 uint64_t Size = Op.size();
9416 while (Size) {
9417 unsigned TySize = Ty.getSizeInBytes();
9418 while (TySize > Size) {
9419 // For now, only use non-vector load / store's for the left-over pieces.
9420 LLT NewTy = Ty;
9421 // FIXME: check for mem op safety and legality of the types. Not all of
9422 // SDAGisms map cleanly to GISel concepts.
9423 if (NewTy.isVector())
9424 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
9425 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
9426 unsigned NewTySize = NewTy.getSizeInBytes();
9427 assert(NewTySize > 0 && "Could not find appropriate type");
9428
9429 // If the new LLT cannot cover all of the remaining bits, then consider
9430 // issuing a (or a pair of) unaligned and overlapping load / store.
9431 unsigned Fast;
9432 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9433 MVT VT = getMVTForLLT(Ty);
9434 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
9436 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
9438 Fast)
9439 TySize = Size;
9440 else {
9441 Ty = NewTy;
9442 TySize = NewTySize;
9443 }
9444 }
9445
9446 if (++NumMemOps > Limit)
9447 return false;
9448
9449 MemOps.push_back(Ty);
9450 Size -= TySize;
9451 }
9452
9453 return true;
9454}
9455
9456// Get a vectorized representation of the memset value operand, GISel edition.
9458 MachineRegisterInfo &MRI = *MIB.getMRI();
9459 unsigned NumBits = Ty.getScalarSizeInBits();
9460 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9461 if (!Ty.isVector() && ValVRegAndVal) {
9462 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9463 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
9464 return MIB.buildConstant(Ty, SplatVal).getReg(0);
9465 }
9466
9467 // Extend the byte value to the larger type, and then multiply by a magic
9468 // value 0x010101... in order to replicate it across every byte.
9469 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9470 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9471 return MIB.buildConstant(Ty, 0).getReg(0);
9472 }
9473
9474 LLT ExtType = Ty.getScalarType();
9475 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
9476 if (NumBits > 8) {
9477 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
9478 auto MagicMI = MIB.buildConstant(ExtType, Magic);
9479 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
9480 }
9481
9482 // For vector types create a G_BUILD_VECTOR.
9483 if (Ty.isVector())
9484 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
9485
9486 return Val;
9487}
9488
9490LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
9491 uint64_t KnownLen, Align Alignment,
9492 bool IsVolatile) {
9493 auto &MF = *MI.getParent()->getParent();
9494 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9495 auto &DL = MF.getDataLayout();
9496 LLVMContext &C = MF.getFunction().getContext();
9497
9498 assert(KnownLen != 0 && "Have a zero length memset length!");
9499
9500 bool DstAlignCanChange = false;
9501 MachineFrameInfo &MFI = MF.getFrameInfo();
9502 bool OptSize = shouldLowerMemFuncForSize(MF);
9503
9504 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9505 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9506 DstAlignCanChange = true;
9507
9508 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
9509 std::vector<LLT> MemOps;
9510
9511 const auto &DstMMO = **MI.memoperands_begin();
9512 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9513
9514 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
9515 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9516
9517 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
9518 MemOp::Set(KnownLen, DstAlignCanChange,
9519 Alignment,
9520 /*IsZeroMemset=*/IsZeroVal,
9521 /*IsVolatile=*/IsVolatile),
9522 DstPtrInfo.getAddrSpace(), ~0u,
9523 MF.getFunction().getAttributes(), TLI))
9524 return UnableToLegalize;
9525
9526 if (DstAlignCanChange) {
9527 // Get an estimate of the type from the LLT.
9528 Type *IRTy = getTypeForLLT(MemOps[0], C);
9529 Align NewAlign = DL.getABITypeAlign(IRTy);
9530 if (NewAlign > Alignment) {
9531 Alignment = NewAlign;
9532 unsigned FI = FIDef->getOperand(1).getIndex();
9533 // Give the stack frame object a larger alignment if needed.
9534 if (MFI.getObjectAlign(FI) < Alignment)
9535 MFI.setObjectAlignment(FI, Alignment);
9536 }
9537 }
9538
9539 MachineIRBuilder MIB(MI);
9540 // Find the largest store and generate the bit pattern for it.
9541 LLT LargestTy = MemOps[0];
9542 for (unsigned i = 1; i < MemOps.size(); i++)
9543 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
9544 LargestTy = MemOps[i];
9545
9546 // The memset stored value is always defined as an s8, so in order to make it
9547 // work with larger store types we need to repeat the bit pattern across the
9548 // wider type.
9549 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
9550
9551 if (!MemSetValue)
9552 return UnableToLegalize;
9553
9554 // Generate the stores. For each store type in the list, we generate the
9555 // matching store of that type to the destination address.
9556 LLT PtrTy = MRI.getType(Dst);
9557 unsigned DstOff = 0;
9558 unsigned Size = KnownLen;
9559 for (unsigned I = 0; I < MemOps.size(); I++) {
9560 LLT Ty = MemOps[I];
9561 unsigned TySize = Ty.getSizeInBytes();
9562 if (TySize > Size) {
9563 // Issuing an unaligned load / store pair that overlaps with the previous
9564 // pair. Adjust the offset accordingly.
9565 assert(I == MemOps.size() - 1 && I != 0);
9566 DstOff -= TySize - Size;
9567 }
9568
9569 // If this store is smaller than the largest store see whether we can get
9570 // the smaller value for free with a truncate.
9571 Register Value = MemSetValue;
9572 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9573 MVT VT = getMVTForLLT(Ty);
9574 MVT LargestVT = getMVTForLLT(LargestTy);
9575 if (!LargestTy.isVector() && !Ty.isVector() &&
9576 TLI.isTruncateFree(LargestVT, VT))
9577 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9578 else
9579 Value = getMemsetValue(Val, Ty, MIB);
9580 if (!Value)
9581 return UnableToLegalize;
9582 }
9583
9584 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9585
9586 Register Ptr = Dst;
9587 if (DstOff != 0) {
9588 auto Offset =
9589 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9590 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9591 }
9592
9593 MIB.buildStore(Value, Ptr, *StoreMMO);
9594 DstOff += Ty.getSizeInBytes();
9595 Size -= TySize;
9596 }
9597
9598 MI.eraseFromParent();
9599 return Legalized;
9600}
9601
9603LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9604 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9605
9606 auto [Dst, Src, Len] = MI.getFirst3Regs();
9607
9608 const auto *MMOIt = MI.memoperands_begin();
9609 const MachineMemOperand *MemOp = *MMOIt;
9610 bool IsVolatile = MemOp->isVolatile();
9611
9612 // See if this is a constant length copy
9613 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9614 // FIXME: support dynamically sized G_MEMCPY_INLINE
9615 assert(LenVRegAndVal &&
9616 "inline memcpy with dynamic size is not yet supported");
9617 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9618 if (KnownLen == 0) {
9619 MI.eraseFromParent();
9620 return Legalized;
9621 }
9622
9623 const auto &DstMMO = **MI.memoperands_begin();
9624 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9625 Align DstAlign = DstMMO.getBaseAlign();
9626 Align SrcAlign = SrcMMO.getBaseAlign();
9627
9628 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9629 IsVolatile);
9630}
9631
9633LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9634 uint64_t KnownLen, Align DstAlign,
9635 Align SrcAlign, bool IsVolatile) {
9636 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9637 return lowerMemcpy(MI, Dst, Src, KnownLen,
9638 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9639 IsVolatile);
9640}
9641
9643LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9644 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9645 Align SrcAlign, bool IsVolatile) {
9646 auto &MF = *MI.getParent()->getParent();
9647 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9648 auto &DL = MF.getDataLayout();
9649 LLVMContext &C = MF.getFunction().getContext();
9650
9651 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9652
9653 bool DstAlignCanChange = false;
9654 MachineFrameInfo &MFI = MF.getFrameInfo();
9655 Align Alignment = std::min(DstAlign, SrcAlign);
9656
9657 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9658 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9659 DstAlignCanChange = true;
9660
9661 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9662 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9663 // if the memcpy is in a tail call position.
9664
9665 std::vector<LLT> MemOps;
9666
9667 const auto &DstMMO = **MI.memoperands_begin();
9668 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9669 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9670 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9671
9673 MemOps, Limit,
9674 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9675 IsVolatile),
9676 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9677 MF.getFunction().getAttributes(), TLI))
9678 return UnableToLegalize;
9679
9680 if (DstAlignCanChange) {
9681 // Get an estimate of the type from the LLT.
9682 Type *IRTy = getTypeForLLT(MemOps[0], C);
9683 Align NewAlign = DL.getABITypeAlign(IRTy);
9684
9685 // Don't promote to an alignment that would require dynamic stack
9686 // realignment.
9687 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9688 if (!TRI->hasStackRealignment(MF))
9689 if (MaybeAlign StackAlign = DL.getStackAlignment())
9690 NewAlign = std::min(NewAlign, *StackAlign);
9691
9692 if (NewAlign > Alignment) {
9693 Alignment = NewAlign;
9694 unsigned FI = FIDef->getOperand(1).getIndex();
9695 // Give the stack frame object a larger alignment if needed.
9696 if (MFI.getObjectAlign(FI) < Alignment)
9697 MFI.setObjectAlignment(FI, Alignment);
9698 }
9699 }
9700
9701 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9702
9703 MachineIRBuilder MIB(MI);
9704 // Now we need to emit a pair of load and stores for each of the types we've
9705 // collected. I.e. for each type, generate a load from the source pointer of
9706 // that type width, and then generate a corresponding store to the dest buffer
9707 // of that value loaded. This can result in a sequence of loads and stores
9708 // mixed types, depending on what the target specifies as good types to use.
9709 unsigned CurrOffset = 0;
9710 unsigned Size = KnownLen;
9711 for (auto CopyTy : MemOps) {
9712 // Issuing an unaligned load / store pair that overlaps with the previous
9713 // pair. Adjust the offset accordingly.
9714 if (CopyTy.getSizeInBytes() > Size)
9715 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9716
9717 // Construct MMOs for the accesses.
9718 auto *LoadMMO =
9719 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9720 auto *StoreMMO =
9721 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9722
9723 // Create the load.
9724 Register LoadPtr = Src;
9726 if (CurrOffset != 0) {
9727 LLT SrcTy = MRI.getType(Src);
9728 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9729 .getReg(0);
9730 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9731 }
9732 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9733
9734 // Create the store.
9735 Register StorePtr = Dst;
9736 if (CurrOffset != 0) {
9737 LLT DstTy = MRI.getType(Dst);
9738 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9739 }
9740 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9741 CurrOffset += CopyTy.getSizeInBytes();
9742 Size -= CopyTy.getSizeInBytes();
9743 }
9744
9745 MI.eraseFromParent();
9746 return Legalized;
9747}
9748
9750LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9751 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9752 bool IsVolatile) {
9753 auto &MF = *MI.getParent()->getParent();
9754 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9755 auto &DL = MF.getDataLayout();
9756 LLVMContext &C = MF.getFunction().getContext();
9757
9758 assert(KnownLen != 0 && "Have a zero length memmove length!");
9759
9760 bool DstAlignCanChange = false;
9761 MachineFrameInfo &MFI = MF.getFrameInfo();
9762 bool OptSize = shouldLowerMemFuncForSize(MF);
9763 Align Alignment = std::min(DstAlign, SrcAlign);
9764
9765 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9766 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9767 DstAlignCanChange = true;
9768
9769 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9770 std::vector<LLT> MemOps;
9771
9772 const auto &DstMMO = **MI.memoperands_begin();
9773 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9774 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9775 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9776
9777 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9778 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9779 // same thing here.
9781 MemOps, Limit,
9782 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9783 /*IsVolatile*/ true),
9784 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9785 MF.getFunction().getAttributes(), TLI))
9786 return UnableToLegalize;
9787
9788 if (DstAlignCanChange) {
9789 // Get an estimate of the type from the LLT.
9790 Type *IRTy = getTypeForLLT(MemOps[0], C);
9791 Align NewAlign = DL.getABITypeAlign(IRTy);
9792
9793 // Don't promote to an alignment that would require dynamic stack
9794 // realignment.
9795 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9796 if (!TRI->hasStackRealignment(MF))
9797 if (MaybeAlign StackAlign = DL.getStackAlignment())
9798 NewAlign = std::min(NewAlign, *StackAlign);
9799
9800 if (NewAlign > Alignment) {
9801 Alignment = NewAlign;
9802 unsigned FI = FIDef->getOperand(1).getIndex();
9803 // Give the stack frame object a larger alignment if needed.
9804 if (MFI.getObjectAlign(FI) < Alignment)
9805 MFI.setObjectAlignment(FI, Alignment);
9806 }
9807 }
9808
9809 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9810
9811 MachineIRBuilder MIB(MI);
9812 // Memmove requires that we perform the loads first before issuing the stores.
9813 // Apart from that, this loop is pretty much doing the same thing as the
9814 // memcpy codegen function.
9815 unsigned CurrOffset = 0;
9817 for (auto CopyTy : MemOps) {
9818 // Construct MMO for the load.
9819 auto *LoadMMO =
9820 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9821
9822 // Create the load.
9823 Register LoadPtr = Src;
9824 if (CurrOffset != 0) {
9825 LLT SrcTy = MRI.getType(Src);
9826 auto Offset =
9827 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9828 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9829 }
9830 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9831 CurrOffset += CopyTy.getSizeInBytes();
9832 }
9833
9834 CurrOffset = 0;
9835 for (unsigned I = 0; I < MemOps.size(); ++I) {
9836 LLT CopyTy = MemOps[I];
9837 // Now store the values loaded.
9838 auto *StoreMMO =
9839 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9840
9841 Register StorePtr = Dst;
9842 if (CurrOffset != 0) {
9843 LLT DstTy = MRI.getType(Dst);
9844 auto Offset =
9845 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9846 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9847 }
9848 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9849 CurrOffset += CopyTy.getSizeInBytes();
9850 }
9851 MI.eraseFromParent();
9852 return Legalized;
9853}
9854
9857 const unsigned Opc = MI.getOpcode();
9858 // This combine is fairly complex so it's not written with a separate
9859 // matcher function.
9860 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9861 Opc == TargetOpcode::G_MEMSET) &&
9862 "Expected memcpy like instruction");
9863
9864 auto MMOIt = MI.memoperands_begin();
9865 const MachineMemOperand *MemOp = *MMOIt;
9866
9867 Align DstAlign = MemOp->getBaseAlign();
9868 Align SrcAlign;
9869 auto [Dst, Src, Len] = MI.getFirst3Regs();
9870
9871 if (Opc != TargetOpcode::G_MEMSET) {
9872 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9873 MemOp = *(++MMOIt);
9874 SrcAlign = MemOp->getBaseAlign();
9875 }
9876
9877 // See if this is a constant length copy
9878 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9879 if (!LenVRegAndVal)
9880 return UnableToLegalize;
9881 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9882
9883 if (KnownLen == 0) {
9884 MI.eraseFromParent();
9885 return Legalized;
9886 }
9887
9888 bool IsVolatile = MemOp->isVolatile();
9889 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9890 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9891 IsVolatile);
9892
9893 // Don't try to optimize volatile.
9894 if (IsVolatile)
9895 return UnableToLegalize;
9896
9897 if (MaxLen && KnownLen > MaxLen)
9898 return UnableToLegalize;
9899
9900 if (Opc == TargetOpcode::G_MEMCPY) {
9901 auto &MF = *MI.getParent()->getParent();
9902 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9903 bool OptSize = shouldLowerMemFuncForSize(MF);
9904 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9905 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9906 IsVolatile);
9907 }
9908 if (Opc == TargetOpcode::G_MEMMOVE)
9909 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9910 if (Opc == TargetOpcode::G_MEMSET)
9911 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9912 return UnableToLegalize;
9913}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:74
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1135
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1095
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1119
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:844
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
bool isSigned() const
Definition: InstrTypes.h:928
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
const APFloat & getValueAPF() const
Definition: Constants.h:314
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:348
bool isBigEndian() const
Definition: DataLayout.h:198
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Represent a G_FCMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:260
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT getScalarType() const
Definition: LowLevelType.h:205
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:946
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:568
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:244
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1960
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:630
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1508
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1553
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1157
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:366
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:485
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:235
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:338
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1245
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:588
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:265
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:266
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:313
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Definition: CallLowering.h:51
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)