LLVM 20.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FASIN:
455 RTLIBCASE(ASIN_F);
456 case TargetOpcode::G_FACOS:
457 RTLIBCASE(ACOS_F);
458 case TargetOpcode::G_FATAN:
459 RTLIBCASE(ATAN_F);
460 case TargetOpcode::G_FSINH:
461 RTLIBCASE(SINH_F);
462 case TargetOpcode::G_FCOSH:
463 RTLIBCASE(COSH_F);
464 case TargetOpcode::G_FTANH:
465 RTLIBCASE(TANH_F);
466 case TargetOpcode::G_FLOG10:
467 RTLIBCASE(LOG10_F);
468 case TargetOpcode::G_FLOG:
469 RTLIBCASE(LOG_F);
470 case TargetOpcode::G_FLOG2:
471 RTLIBCASE(LOG2_F);
472 case TargetOpcode::G_FLDEXP:
473 RTLIBCASE(LDEXP_F);
474 case TargetOpcode::G_FCEIL:
475 RTLIBCASE(CEIL_F);
476 case TargetOpcode::G_FFLOOR:
477 RTLIBCASE(FLOOR_F);
478 case TargetOpcode::G_FMINNUM:
479 RTLIBCASE(FMIN_F);
480 case TargetOpcode::G_FMAXNUM:
481 RTLIBCASE(FMAX_F);
482 case TargetOpcode::G_FSQRT:
483 RTLIBCASE(SQRT_F);
484 case TargetOpcode::G_FRINT:
485 RTLIBCASE(RINT_F);
486 case TargetOpcode::G_FNEARBYINT:
487 RTLIBCASE(NEARBYINT_F);
488 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
489 RTLIBCASE(ROUNDEVEN_F);
490 case TargetOpcode::G_INTRINSIC_LRINT:
491 RTLIBCASE(LRINT_F);
492 case TargetOpcode::G_INTRINSIC_LLRINT:
493 RTLIBCASE(LLRINT_F);
494 }
495 llvm_unreachable("Unknown libcall function");
496}
497
498/// True if an instruction is in tail position in its caller. Intended for
499/// legalizing libcalls as tail calls when possible.
502 const TargetInstrInfo &TII,
504 MachineBasicBlock &MBB = *MI.getParent();
505 const Function &F = MBB.getParent()->getFunction();
506
507 // Conservatively require the attributes of the call to match those of
508 // the return. Ignore NoAlias and NonNull because they don't affect the
509 // call sequence.
510 AttributeList CallerAttrs = F.getAttributes();
511 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
512 .removeAttribute(Attribute::NoAlias)
513 .removeAttribute(Attribute::NonNull)
514 .hasAttributes())
515 return false;
516
517 // It's not safe to eliminate the sign / zero extension of the return value.
518 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
519 CallerAttrs.hasRetAttr(Attribute::SExt))
520 return false;
521
522 // Only tail call if the following instruction is a standard return or if we
523 // have a `thisreturn` callee, and a sequence like:
524 //
525 // G_MEMCPY %0, %1, %2
526 // $x0 = COPY %0
527 // RET_ReallyLR implicit $x0
528 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
529 if (Next != MBB.instr_end() && Next->isCopy()) {
530 if (MI.getOpcode() == TargetOpcode::G_BZERO)
531 return false;
532
533 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
534 // mempy/etc routines return the same parameter. For other it will be the
535 // returned value.
536 Register VReg = MI.getOperand(0).getReg();
537 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
538 return false;
539
540 Register PReg = Next->getOperand(0).getReg();
541 if (!PReg.isPhysical())
542 return false;
543
544 auto Ret = next_nodbg(Next, MBB.instr_end());
545 if (Ret == MBB.instr_end() || !Ret->isReturn())
546 return false;
547
548 if (Ret->getNumImplicitOperands() != 1)
549 return false;
550
551 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
552 return false;
553
554 // Skip over the COPY that we just validated.
555 Next = Ret;
556 }
557
558 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
559 return false;
560
561 return true;
562}
563
566 const CallLowering::ArgInfo &Result,
568 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
569 MachineInstr *MI) {
570 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
571
573 Info.CallConv = CC;
575 Info.OrigRet = Result;
576 if (MI)
577 Info.IsTailCall =
578 (Result.Ty->isVoidTy() ||
579 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
580 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
581 *MIRBuilder.getMRI());
582
583 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
584 if (!CLI.lowerCall(MIRBuilder, Info))
586
587 if (MI && Info.LoweredTailCall) {
588 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
589
590 // Check debug locations before removing the return.
591 LocObserver.checkpoint(true);
592
593 // We must have a return following the call (or debug insts) to get past
594 // isLibCallInTailPosition.
595 do {
596 MachineInstr *Next = MI->getNextNode();
597 assert(Next &&
598 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
599 "Expected instr following MI to be return or debug inst?");
600 // We lowered a tail call, so the call is now the return from the block.
601 // Delete the old return.
602 Next->eraseFromParent();
603 } while (MI->getNextNode());
604
605 // We expect to lose the debug location from the return.
606 LocObserver.checkpoint(false);
607 }
609}
610
613 const CallLowering::ArgInfo &Result,
615 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
616 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
617 const char *Name = TLI.getLibcallName(Libcall);
618 if (!Name)
620 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
621 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
622}
623
624// Useful for libcalls where all operands have the same type.
627 Type *OpType, LostDebugLocObserver &LocObserver) {
628 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
629
630 // FIXME: What does the original arg index mean here?
632 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
633 Args.push_back({MO.getReg(), OpType, 0});
634 return createLibcall(MIRBuilder, Libcall,
635 {MI.getOperand(0).getReg(), OpType, 0}, Args,
636 LocObserver, &MI);
637}
638
641 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
642 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
643
645 // Add all the args, except for the last which is an imm denoting 'tail'.
646 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
647 Register Reg = MI.getOperand(i).getReg();
648
649 // Need derive an IR type for call lowering.
650 LLT OpLLT = MRI.getType(Reg);
651 Type *OpTy = nullptr;
652 if (OpLLT.isPointer())
653 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
654 else
655 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
656 Args.push_back({Reg, OpTy, 0});
657 }
658
659 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
660 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
661 RTLIB::Libcall RTLibcall;
662 unsigned Opc = MI.getOpcode();
663 switch (Opc) {
664 case TargetOpcode::G_BZERO:
665 RTLibcall = RTLIB::BZERO;
666 break;
667 case TargetOpcode::G_MEMCPY:
668 RTLibcall = RTLIB::MEMCPY;
669 Args[0].Flags[0].setReturned();
670 break;
671 case TargetOpcode::G_MEMMOVE:
672 RTLibcall = RTLIB::MEMMOVE;
673 Args[0].Flags[0].setReturned();
674 break;
675 case TargetOpcode::G_MEMSET:
676 RTLibcall = RTLIB::MEMSET;
677 Args[0].Flags[0].setReturned();
678 break;
679 default:
680 llvm_unreachable("unsupported opcode");
681 }
682 const char *Name = TLI.getLibcallName(RTLibcall);
683
684 // Unsupported libcall on the target.
685 if (!Name) {
686 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
687 << MIRBuilder.getTII().getName(Opc) << "\n");
689 }
690
692 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
694 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
695 Info.IsTailCall =
696 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
697 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
698
699 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
700 if (!CLI.lowerCall(MIRBuilder, Info))
702
703 if (Info.LoweredTailCall) {
704 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
705
706 // Check debug locations before removing the return.
707 LocObserver.checkpoint(true);
708
709 // We must have a return following the call (or debug insts) to get past
710 // isLibCallInTailPosition.
711 do {
712 MachineInstr *Next = MI.getNextNode();
713 assert(Next &&
714 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
715 "Expected instr following MI to be return or debug inst?");
716 // We lowered a tail call, so the call is now the return from the block.
717 // Delete the old return.
718 Next->eraseFromParent();
719 } while (MI.getNextNode());
720
721 // We expect to lose the debug location from the return.
722 LocObserver.checkpoint(false);
723 }
724
726}
727
729 unsigned Opc = MI.getOpcode();
730 auto &AtomicMI = cast<GMemOperation>(MI);
731 auto &MMO = AtomicMI.getMMO();
732 auto Ordering = MMO.getMergedOrdering();
733 LLT MemType = MMO.getMemoryType();
734 uint64_t MemSize = MemType.getSizeInBytes();
735 if (MemType.isVector())
736 return RTLIB::UNKNOWN_LIBCALL;
737
738#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
739#define LCALL5(A) \
740 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
741 switch (Opc) {
742 case TargetOpcode::G_ATOMIC_CMPXCHG:
743 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
744 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
745 return getOutlineAtomicHelper(LC, Ordering, MemSize);
746 }
747 case TargetOpcode::G_ATOMICRMW_XCHG: {
748 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
749 return getOutlineAtomicHelper(LC, Ordering, MemSize);
750 }
751 case TargetOpcode::G_ATOMICRMW_ADD:
752 case TargetOpcode::G_ATOMICRMW_SUB: {
753 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
754 return getOutlineAtomicHelper(LC, Ordering, MemSize);
755 }
756 case TargetOpcode::G_ATOMICRMW_AND: {
757 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
758 return getOutlineAtomicHelper(LC, Ordering, MemSize);
759 }
760 case TargetOpcode::G_ATOMICRMW_OR: {
761 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
762 return getOutlineAtomicHelper(LC, Ordering, MemSize);
763 }
764 case TargetOpcode::G_ATOMICRMW_XOR: {
765 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 }
768 default:
769 return RTLIB::UNKNOWN_LIBCALL;
770 }
771#undef LCALLS
772#undef LCALL5
773}
774
777 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
778
779 Type *RetTy;
780 SmallVector<Register> RetRegs;
782 unsigned Opc = MI.getOpcode();
783 switch (Opc) {
784 case TargetOpcode::G_ATOMIC_CMPXCHG:
785 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
787 LLT SuccessLLT;
788 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
789 MI.getFirst4RegLLTs();
790 RetRegs.push_back(Ret);
791 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
792 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
793 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
794 NewLLT) = MI.getFirst5RegLLTs();
795 RetRegs.push_back(Success);
797 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
798 }
799 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
800 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
801 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
802 break;
803 }
804 case TargetOpcode::G_ATOMICRMW_XCHG:
805 case TargetOpcode::G_ATOMICRMW_ADD:
806 case TargetOpcode::G_ATOMICRMW_SUB:
807 case TargetOpcode::G_ATOMICRMW_AND:
808 case TargetOpcode::G_ATOMICRMW_OR:
809 case TargetOpcode::G_ATOMICRMW_XOR: {
810 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
811 RetRegs.push_back(Ret);
812 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
813 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
814 Val =
815 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
816 .getReg(0);
817 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
818 Val =
819 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
820 .getReg(0);
821 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
822 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
823 break;
824 }
825 default:
826 llvm_unreachable("unsupported opcode");
827 }
828
829 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
830 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
832 const char *Name = TLI.getLibcallName(RTLibcall);
833
834 // Unsupported libcall on the target.
835 if (!Name) {
836 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
837 << MIRBuilder.getTII().getName(Opc) << "\n");
839 }
840
842 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
844 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
845
846 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
847 if (!CLI.lowerCall(MIRBuilder, Info))
849
851}
852
853static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
854 Type *FromType) {
855 auto ToMVT = MVT::getVT(ToType);
856 auto FromMVT = MVT::getVT(FromType);
857
858 switch (Opcode) {
859 case TargetOpcode::G_FPEXT:
860 return RTLIB::getFPEXT(FromMVT, ToMVT);
861 case TargetOpcode::G_FPTRUNC:
862 return RTLIB::getFPROUND(FromMVT, ToMVT);
863 case TargetOpcode::G_FPTOSI:
864 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
865 case TargetOpcode::G_FPTOUI:
866 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
867 case TargetOpcode::G_SITOFP:
868 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
869 case TargetOpcode::G_UITOFP:
870 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
871 }
872 llvm_unreachable("Unsupported libcall function");
873}
874
877 Type *FromType, LostDebugLocObserver &LocObserver) {
878 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
879 return createLibcall(
880 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
881 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
882}
883
884static RTLIB::Libcall
886 RTLIB::Libcall RTLibcall;
887 switch (MI.getOpcode()) {
888 case TargetOpcode::G_GET_FPENV:
889 RTLibcall = RTLIB::FEGETENV;
890 break;
891 case TargetOpcode::G_SET_FPENV:
892 case TargetOpcode::G_RESET_FPENV:
893 RTLibcall = RTLIB::FESETENV;
894 break;
895 case TargetOpcode::G_GET_FPMODE:
896 RTLibcall = RTLIB::FEGETMODE;
897 break;
898 case TargetOpcode::G_SET_FPMODE:
899 case TargetOpcode::G_RESET_FPMODE:
900 RTLibcall = RTLIB::FESETMODE;
901 break;
902 default:
903 llvm_unreachable("Unexpected opcode");
904 }
905 return RTLibcall;
906}
907
908// Some library functions that read FP state (fegetmode, fegetenv) write the
909// state into a region in memory. IR intrinsics that do the same operations
910// (get_fpmode, get_fpenv) return the state as integer value. To implement these
911// intrinsics via the library functions, we need to use temporary variable,
912// for example:
913//
914// %0:_(s32) = G_GET_FPMODE
915//
916// is transformed to:
917//
918// %1:_(p0) = G_FRAME_INDEX %stack.0
919// BL &fegetmode
920// %0:_(s32) = G_LOAD % 1
921//
923LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
925 LostDebugLocObserver &LocObserver) {
927 auto &MF = MIRBuilder.getMF();
928 auto &MRI = *MIRBuilder.getMRI();
929 auto &Ctx = MF.getFunction().getContext();
930
931 // Create temporary, where library function will put the read state.
932 Register Dst = MI.getOperand(0).getReg();
933 LLT StateTy = MRI.getType(Dst);
934 TypeSize StateSize = StateTy.getSizeInBytes();
936 MachinePointerInfo TempPtrInfo;
937 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
938
939 // Create a call to library function, with the temporary as an argument.
940 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
941 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
943 auto Res =
944 createLibcall(MIRBuilder, RTLibcall,
946 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
947 LocObserver, nullptr);
949 return Res;
950
951 // Create a load from the temporary.
952 MachineMemOperand *MMO = MF.getMachineMemOperand(
953 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
954 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
955
957}
958
959// Similar to `createGetStateLibcall` the function calls a library function
960// using transient space in stack. In this case the library function reads
961// content of memory region.
963LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
965 LostDebugLocObserver &LocObserver) {
967 auto &MF = MIRBuilder.getMF();
968 auto &MRI = *MIRBuilder.getMRI();
969 auto &Ctx = MF.getFunction().getContext();
970
971 // Create temporary, where library function will get the new state.
972 Register Src = MI.getOperand(0).getReg();
973 LLT StateTy = MRI.getType(Src);
974 TypeSize StateSize = StateTy.getSizeInBytes();
976 MachinePointerInfo TempPtrInfo;
977 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
978
979 // Put the new state into the temporary.
980 MachineMemOperand *MMO = MF.getMachineMemOperand(
981 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
982 MIRBuilder.buildStore(Src, Temp, *MMO);
983
984 // Create a call to library function, with the temporary as an argument.
985 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
986 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
988 return createLibcall(MIRBuilder, RTLibcall,
990 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
991 LocObserver, nullptr);
992}
993
994/// Returns the corresponding libcall for the given Pred and
995/// the ICMP predicate that should be generated to compare with #0
996/// after the libcall.
997static std::pair<RTLIB::Libcall, CmpInst::Predicate>
999
1000 switch (Pred) {
1001 case CmpInst::FCMP_OEQ:
1002 return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ};
1003 case CmpInst::FCMP_UNE:
1004 return {RTLIB::UNE_F128, CmpInst::ICMP_NE};
1005 case CmpInst::FCMP_OGE:
1006 return {RTLIB::OGE_F128, CmpInst::ICMP_SGE};
1007 case CmpInst::FCMP_OLT:
1008 return {RTLIB::OLT_F128, CmpInst::ICMP_SLT};
1009 case CmpInst::FCMP_OLE:
1010 return {RTLIB::OLE_F128, CmpInst::ICMP_SLE};
1011 case CmpInst::FCMP_OGT:
1012 return {RTLIB::OGT_F128, CmpInst::ICMP_SGT};
1013 case CmpInst::FCMP_UNO:
1014 return {RTLIB::UO_F128, CmpInst::ICMP_NE};
1015 default:
1016 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1017 }
1018}
1019
1021LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1023 LostDebugLocObserver &LocObserver) {
1024 auto &MF = MIRBuilder.getMF();
1025 auto &Ctx = MF.getFunction().getContext();
1026 const GFCmp *Cmp = cast<GFCmp>(&MI);
1027
1028 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1029 if (OpLLT != LLT::scalar(128) || OpLLT != MRI.getType(Cmp->getRHSReg()))
1030 return UnableToLegalize;
1031
1032 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1033
1034 // DstReg type is s32
1035 const Register DstReg = Cmp->getReg(0);
1036 const auto Cond = Cmp->getCond();
1037
1038 // Reference:
1039 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1040 // Generates a libcall followed by ICMP.
1041 const auto BuildLibcall =
1042 [&](const RTLIB::Libcall Libcall, const CmpInst::Predicate ICmpPred,
1043 const DstOp &Res = LLT::scalar(32)) -> Register {
1044 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1045 constexpr LLT TempLLT = LLT::scalar(32);
1046 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1047 // Generate libcall, holding result in Temp
1048 const auto Status = createLibcall(
1049 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1050 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1051 LocObserver, &MI);
1052 if (!Status)
1053 return {};
1054
1055 // Compare temp with #0 to get the final result.
1056 return MIRBuilder
1057 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1058 .getReg(0);
1059 };
1060
1061 // Simple case if we have a direct mapping from predicate to libcall
1062 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond);
1063 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1064 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1065 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1066 return Legalized;
1067 }
1068 return UnableToLegalize;
1069 }
1070
1071 // No direct mapping found, should be generated as combination of libcalls.
1072
1073 switch (Cond) {
1074 case CmpInst::FCMP_UEQ: {
1075 // FCMP_UEQ: unordered or equal
1076 // Convert into (FCMP_OEQ || FCMP_UNO).
1077
1078 const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
1079 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
1080
1081 const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
1082 const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
1083 if (Oeq && Uno)
1084 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1085 else
1086 return UnableToLegalize;
1087
1088 break;
1089 }
1090 case CmpInst::FCMP_ONE: {
1091 // FCMP_ONE: ordered and operands are unequal
1092 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1093
1094 // We inverse the predicate instead of generating a NOT
1095 // to save one instruction.
1096 // On AArch64 isel can even select two cmp into a single ccmp.
1097 const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
1098 const auto NotOeq =
1099 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred));
1100
1101 const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
1102 const auto NotUno =
1103 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred));
1104
1105 if (NotOeq && NotUno)
1106 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1107 else
1108 return UnableToLegalize;
1109
1110 break;
1111 }
1112 case CmpInst::FCMP_ULT:
1113 case CmpInst::FCMP_UGE:
1114 case CmpInst::FCMP_UGT:
1115 case CmpInst::FCMP_ULE:
1116 case CmpInst::FCMP_ORD: {
1117 // Convert into: !(inverse(Pred))
1118 // E.g. FCMP_ULT becomes !FCMP_OGE
1119 // This is equivalent to the following, but saves some instructions.
1120 // MIRBuilder.buildNot(
1121 // PredTy,
1122 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1123 // Op1, Op2));
1124 const auto [InversedLibcall, InversedPred] =
1126 if (!BuildLibcall(InversedLibcall,
1127 CmpInst::getInversePredicate(InversedPred), DstReg))
1128 return UnableToLegalize;
1129 break;
1130 }
1131 default:
1132 return UnableToLegalize;
1133 }
1134
1135 return Legalized;
1136}
1137
1138// The function is used to legalize operations that set default environment
1139// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1140// On most targets supported in glibc FE_DFL_MODE is defined as
1141// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1142// it is not true, the target must provide custom lowering.
1144LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1146 LostDebugLocObserver &LocObserver) {
1148 auto &MF = MIRBuilder.getMF();
1149 auto &Ctx = MF.getFunction().getContext();
1150
1151 // Create an argument for the library function.
1152 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1153 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1154 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1155 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1156 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1157 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1158 MIRBuilder.buildIntToPtr(Dest, DefValue);
1159
1161 return createLibcall(MIRBuilder, RTLibcall,
1163 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1164 LocObserver, &MI);
1165}
1166
1169 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1170
1171 switch (MI.getOpcode()) {
1172 default:
1173 return UnableToLegalize;
1174 case TargetOpcode::G_MUL:
1175 case TargetOpcode::G_SDIV:
1176 case TargetOpcode::G_UDIV:
1177 case TargetOpcode::G_SREM:
1178 case TargetOpcode::G_UREM:
1179 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1180 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1181 unsigned Size = LLTy.getSizeInBits();
1182 Type *HLTy = IntegerType::get(Ctx, Size);
1183 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1184 if (Status != Legalized)
1185 return Status;
1186 break;
1187 }
1188 case TargetOpcode::G_FADD:
1189 case TargetOpcode::G_FSUB:
1190 case TargetOpcode::G_FMUL:
1191 case TargetOpcode::G_FDIV:
1192 case TargetOpcode::G_FMA:
1193 case TargetOpcode::G_FPOW:
1194 case TargetOpcode::G_FREM:
1195 case TargetOpcode::G_FCOS:
1196 case TargetOpcode::G_FSIN:
1197 case TargetOpcode::G_FTAN:
1198 case TargetOpcode::G_FACOS:
1199 case TargetOpcode::G_FASIN:
1200 case TargetOpcode::G_FATAN:
1201 case TargetOpcode::G_FCOSH:
1202 case TargetOpcode::G_FSINH:
1203 case TargetOpcode::G_FTANH:
1204 case TargetOpcode::G_FLOG10:
1205 case TargetOpcode::G_FLOG:
1206 case TargetOpcode::G_FLOG2:
1207 case TargetOpcode::G_FLDEXP:
1208 case TargetOpcode::G_FEXP:
1209 case TargetOpcode::G_FEXP2:
1210 case TargetOpcode::G_FEXP10:
1211 case TargetOpcode::G_FCEIL:
1212 case TargetOpcode::G_FFLOOR:
1213 case TargetOpcode::G_FMINNUM:
1214 case TargetOpcode::G_FMAXNUM:
1215 case TargetOpcode::G_FSQRT:
1216 case TargetOpcode::G_FRINT:
1217 case TargetOpcode::G_FNEARBYINT:
1218 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1219 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1220 unsigned Size = LLTy.getSizeInBits();
1221 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1222 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1223 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1224 return UnableToLegalize;
1225 }
1226 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1227 if (Status != Legalized)
1228 return Status;
1229 break;
1230 }
1231 case TargetOpcode::G_INTRINSIC_LRINT:
1232 case TargetOpcode::G_INTRINSIC_LLRINT: {
1233 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1234 unsigned Size = LLTy.getSizeInBits();
1235 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1236 Type *ITy = IntegerType::get(
1237 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1238 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1239 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1240 return UnableToLegalize;
1241 }
1242 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1244 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1245 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1246 if (Status != Legalized)
1247 return Status;
1248 MI.eraseFromParent();
1249 return Legalized;
1250 }
1251 case TargetOpcode::G_FPOWI: {
1252 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1253 unsigned Size = LLTy.getSizeInBits();
1254 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1255 Type *ITy = IntegerType::get(
1256 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1257 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1258 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1259 return UnableToLegalize;
1260 }
1261 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1262 std::initializer_list<CallLowering::ArgInfo> Args = {
1263 {MI.getOperand(1).getReg(), HLTy, 0},
1264 {MI.getOperand(2).getReg(), ITy, 1}};
1266 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1267 Args, LocObserver, &MI);
1268 if (Status != Legalized)
1269 return Status;
1270 break;
1271 }
1272 case TargetOpcode::G_FPEXT:
1273 case TargetOpcode::G_FPTRUNC: {
1274 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1275 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1276 if (!FromTy || !ToTy)
1277 return UnableToLegalize;
1279 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1280 if (Status != Legalized)
1281 return Status;
1282 break;
1283 }
1284 case TargetOpcode::G_FCMP: {
1285 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1286 if (Status != Legalized)
1287 return Status;
1288 MI.eraseFromParent();
1289 return Status;
1290 }
1291 case TargetOpcode::G_FPTOSI:
1292 case TargetOpcode::G_FPTOUI: {
1293 // FIXME: Support other types
1294 Type *FromTy =
1295 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1296 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1297 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1298 return UnableToLegalize;
1300 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
1301 if (Status != Legalized)
1302 return Status;
1303 break;
1304 }
1305 case TargetOpcode::G_SITOFP:
1306 case TargetOpcode::G_UITOFP: {
1307 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1308 Type *ToTy =
1309 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1310 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1311 return UnableToLegalize;
1313 MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver);
1314 if (Status != Legalized)
1315 return Status;
1316 break;
1317 }
1318 case TargetOpcode::G_ATOMICRMW_XCHG:
1319 case TargetOpcode::G_ATOMICRMW_ADD:
1320 case TargetOpcode::G_ATOMICRMW_SUB:
1321 case TargetOpcode::G_ATOMICRMW_AND:
1322 case TargetOpcode::G_ATOMICRMW_OR:
1323 case TargetOpcode::G_ATOMICRMW_XOR:
1324 case TargetOpcode::G_ATOMIC_CMPXCHG:
1325 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1327 if (Status != Legalized)
1328 return Status;
1329 break;
1330 }
1331 case TargetOpcode::G_BZERO:
1332 case TargetOpcode::G_MEMCPY:
1333 case TargetOpcode::G_MEMMOVE:
1334 case TargetOpcode::G_MEMSET: {
1335 LegalizeResult Result =
1336 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1337 if (Result != Legalized)
1338 return Result;
1339 MI.eraseFromParent();
1340 return Result;
1341 }
1342 case TargetOpcode::G_GET_FPENV:
1343 case TargetOpcode::G_GET_FPMODE: {
1344 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1345 if (Result != Legalized)
1346 return Result;
1347 break;
1348 }
1349 case TargetOpcode::G_SET_FPENV:
1350 case TargetOpcode::G_SET_FPMODE: {
1351 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1352 if (Result != Legalized)
1353 return Result;
1354 break;
1355 }
1356 case TargetOpcode::G_RESET_FPENV:
1357 case TargetOpcode::G_RESET_FPMODE: {
1358 LegalizeResult Result =
1359 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1360 if (Result != Legalized)
1361 return Result;
1362 break;
1363 }
1364 }
1365
1366 MI.eraseFromParent();
1367 return Legalized;
1368}
1369
1371 unsigned TypeIdx,
1372 LLT NarrowTy) {
1373 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1374 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1375
1376 switch (MI.getOpcode()) {
1377 default:
1378 return UnableToLegalize;
1379 case TargetOpcode::G_IMPLICIT_DEF: {
1380 Register DstReg = MI.getOperand(0).getReg();
1381 LLT DstTy = MRI.getType(DstReg);
1382
1383 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1384 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1385 // FIXME: Although this would also be legal for the general case, it causes
1386 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1387 // combines not being hit). This seems to be a problem related to the
1388 // artifact combiner.
1389 if (SizeOp0 % NarrowSize != 0) {
1390 LLT ImplicitTy = NarrowTy;
1391 if (DstTy.isVector())
1392 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1393
1394 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1395 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1396
1397 MI.eraseFromParent();
1398 return Legalized;
1399 }
1400
1401 int NumParts = SizeOp0 / NarrowSize;
1402
1404 for (int i = 0; i < NumParts; ++i)
1405 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1406
1407 if (DstTy.isVector())
1408 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1409 else
1410 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1411 MI.eraseFromParent();
1412 return Legalized;
1413 }
1414 case TargetOpcode::G_CONSTANT: {
1415 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1416 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1417 unsigned TotalSize = Ty.getSizeInBits();
1418 unsigned NarrowSize = NarrowTy.getSizeInBits();
1419 int NumParts = TotalSize / NarrowSize;
1420
1421 SmallVector<Register, 4> PartRegs;
1422 for (int I = 0; I != NumParts; ++I) {
1423 unsigned Offset = I * NarrowSize;
1424 auto K = MIRBuilder.buildConstant(NarrowTy,
1425 Val.lshr(Offset).trunc(NarrowSize));
1426 PartRegs.push_back(K.getReg(0));
1427 }
1428
1429 LLT LeftoverTy;
1430 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1431 SmallVector<Register, 1> LeftoverRegs;
1432 if (LeftoverBits != 0) {
1433 LeftoverTy = LLT::scalar(LeftoverBits);
1434 auto K = MIRBuilder.buildConstant(
1435 LeftoverTy,
1436 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1437 LeftoverRegs.push_back(K.getReg(0));
1438 }
1439
1440 insertParts(MI.getOperand(0).getReg(),
1441 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1442
1443 MI.eraseFromParent();
1444 return Legalized;
1445 }
1446 case TargetOpcode::G_SEXT:
1447 case TargetOpcode::G_ZEXT:
1448 case TargetOpcode::G_ANYEXT:
1449 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1450 case TargetOpcode::G_TRUNC: {
1451 if (TypeIdx != 1)
1452 return UnableToLegalize;
1453
1454 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1455 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1456 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1457 return UnableToLegalize;
1458 }
1459
1460 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1461 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1462 MI.eraseFromParent();
1463 return Legalized;
1464 }
1465 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1466 case TargetOpcode::G_FREEZE: {
1467 if (TypeIdx != 0)
1468 return UnableToLegalize;
1469
1470 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1471 // Should widen scalar first
1472 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1473 return UnableToLegalize;
1474
1475 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1477 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1478 Parts.push_back(
1479 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1480 .getReg(0));
1481 }
1482
1483 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1484 MI.eraseFromParent();
1485 return Legalized;
1486 }
1487 case TargetOpcode::G_ADD:
1488 case TargetOpcode::G_SUB:
1489 case TargetOpcode::G_SADDO:
1490 case TargetOpcode::G_SSUBO:
1491 case TargetOpcode::G_SADDE:
1492 case TargetOpcode::G_SSUBE:
1493 case TargetOpcode::G_UADDO:
1494 case TargetOpcode::G_USUBO:
1495 case TargetOpcode::G_UADDE:
1496 case TargetOpcode::G_USUBE:
1497 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1498 case TargetOpcode::G_MUL:
1499 case TargetOpcode::G_UMULH:
1500 return narrowScalarMul(MI, NarrowTy);
1501 case TargetOpcode::G_EXTRACT:
1502 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1503 case TargetOpcode::G_INSERT:
1504 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1505 case TargetOpcode::G_LOAD: {
1506 auto &LoadMI = cast<GLoad>(MI);
1507 Register DstReg = LoadMI.getDstReg();
1508 LLT DstTy = MRI.getType(DstReg);
1509 if (DstTy.isVector())
1510 return UnableToLegalize;
1511
1512 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1513 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1514 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1515 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1516 LoadMI.eraseFromParent();
1517 return Legalized;
1518 }
1519
1520 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1521 }
1522 case TargetOpcode::G_ZEXTLOAD:
1523 case TargetOpcode::G_SEXTLOAD: {
1524 auto &LoadMI = cast<GExtLoad>(MI);
1525 Register DstReg = LoadMI.getDstReg();
1526 Register PtrReg = LoadMI.getPointerReg();
1527
1528 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1529 auto &MMO = LoadMI.getMMO();
1530 unsigned MemSize = MMO.getSizeInBits().getValue();
1531
1532 if (MemSize == NarrowSize) {
1533 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1534 } else if (MemSize < NarrowSize) {
1535 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1536 } else if (MemSize > NarrowSize) {
1537 // FIXME: Need to split the load.
1538 return UnableToLegalize;
1539 }
1540
1541 if (isa<GZExtLoad>(LoadMI))
1542 MIRBuilder.buildZExt(DstReg, TmpReg);
1543 else
1544 MIRBuilder.buildSExt(DstReg, TmpReg);
1545
1546 LoadMI.eraseFromParent();
1547 return Legalized;
1548 }
1549 case TargetOpcode::G_STORE: {
1550 auto &StoreMI = cast<GStore>(MI);
1551
1552 Register SrcReg = StoreMI.getValueReg();
1553 LLT SrcTy = MRI.getType(SrcReg);
1554 if (SrcTy.isVector())
1555 return UnableToLegalize;
1556
1557 int NumParts = SizeOp0 / NarrowSize;
1558 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1559 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1560 if (SrcTy.isVector() && LeftoverBits != 0)
1561 return UnableToLegalize;
1562
1563 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1564 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1565 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1566 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1567 StoreMI.eraseFromParent();
1568 return Legalized;
1569 }
1570
1571 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1572 }
1573 case TargetOpcode::G_SELECT:
1574 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1575 case TargetOpcode::G_AND:
1576 case TargetOpcode::G_OR:
1577 case TargetOpcode::G_XOR: {
1578 // Legalize bitwise operation:
1579 // A = BinOp<Ty> B, C
1580 // into:
1581 // B1, ..., BN = G_UNMERGE_VALUES B
1582 // C1, ..., CN = G_UNMERGE_VALUES C
1583 // A1 = BinOp<Ty/N> B1, C2
1584 // ...
1585 // AN = BinOp<Ty/N> BN, CN
1586 // A = G_MERGE_VALUES A1, ..., AN
1587 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1588 }
1589 case TargetOpcode::G_SHL:
1590 case TargetOpcode::G_LSHR:
1591 case TargetOpcode::G_ASHR:
1592 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1593 case TargetOpcode::G_CTLZ:
1594 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1595 case TargetOpcode::G_CTTZ:
1596 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1597 case TargetOpcode::G_CTPOP:
1598 if (TypeIdx == 1)
1599 switch (MI.getOpcode()) {
1600 case TargetOpcode::G_CTLZ:
1601 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1602 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1603 case TargetOpcode::G_CTTZ:
1604 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1605 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1606 case TargetOpcode::G_CTPOP:
1607 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1608 default:
1609 return UnableToLegalize;
1610 }
1611
1613 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1615 return Legalized;
1616 case TargetOpcode::G_INTTOPTR:
1617 if (TypeIdx != 1)
1618 return UnableToLegalize;
1619
1621 narrowScalarSrc(MI, NarrowTy, 1);
1623 return Legalized;
1624 case TargetOpcode::G_PTRTOINT:
1625 if (TypeIdx != 0)
1626 return UnableToLegalize;
1627
1629 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1631 return Legalized;
1632 case TargetOpcode::G_PHI: {
1633 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1634 // NarrowSize.
1635 if (SizeOp0 % NarrowSize != 0)
1636 return UnableToLegalize;
1637
1638 unsigned NumParts = SizeOp0 / NarrowSize;
1639 SmallVector<Register, 2> DstRegs(NumParts);
1640 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1642 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1643 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1645 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1646 SrcRegs[i / 2], MIRBuilder, MRI);
1647 }
1648 MachineBasicBlock &MBB = *MI.getParent();
1650 for (unsigned i = 0; i < NumParts; ++i) {
1651 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1653 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1654 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1655 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1656 }
1658 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1660 MI.eraseFromParent();
1661 return Legalized;
1662 }
1663 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1664 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1665 if (TypeIdx != 2)
1666 return UnableToLegalize;
1667
1668 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1670 narrowScalarSrc(MI, NarrowTy, OpIdx);
1672 return Legalized;
1673 }
1674 case TargetOpcode::G_ICMP: {
1675 Register LHS = MI.getOperand(2).getReg();
1676 LLT SrcTy = MRI.getType(LHS);
1677 uint64_t SrcSize = SrcTy.getSizeInBits();
1678 CmpInst::Predicate Pred =
1679 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1680
1681 // TODO: Handle the non-equality case for weird sizes.
1682 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1683 return UnableToLegalize;
1684
1685 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1686 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1687 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1688 LHSLeftoverRegs, MIRBuilder, MRI))
1689 return UnableToLegalize;
1690
1691 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1692 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1693 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1694 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1695 return UnableToLegalize;
1696
1697 // We now have the LHS and RHS of the compare split into narrow-type
1698 // registers, plus potentially some leftover type.
1699 Register Dst = MI.getOperand(0).getReg();
1700 LLT ResTy = MRI.getType(Dst);
1701 if (ICmpInst::isEquality(Pred)) {
1702 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1703 // them together. For each equal part, the result should be all 0s. For
1704 // each non-equal part, we'll get at least one 1.
1705 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1707 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1708 auto LHS = std::get<0>(LHSAndRHS);
1709 auto RHS = std::get<1>(LHSAndRHS);
1710 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1711 Xors.push_back(Xor);
1712 }
1713
1714 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1715 // to the desired narrow type so that we can OR them together later.
1716 SmallVector<Register, 4> WidenedXors;
1717 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1718 auto LHS = std::get<0>(LHSAndRHS);
1719 auto RHS = std::get<1>(LHSAndRHS);
1720 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1721 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1722 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1723 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1724 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1725 }
1726
1727 // Now, for each part we broke up, we know if they are equal/not equal
1728 // based off the G_XOR. We can OR these all together and compare against
1729 // 0 to get the result.
1730 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1731 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1732 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1733 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1734 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1735 } else {
1736 // TODO: Handle non-power-of-two types.
1737 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1738 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1739 Register LHSL = LHSPartRegs[0];
1740 Register LHSH = LHSPartRegs[1];
1741 Register RHSL = RHSPartRegs[0];
1742 Register RHSH = RHSPartRegs[1];
1743 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1744 MachineInstrBuilder CmpHEQ =
1747 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1748 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1749 }
1750 MI.eraseFromParent();
1751 return Legalized;
1752 }
1753 case TargetOpcode::G_FCMP:
1754 if (TypeIdx != 0)
1755 return UnableToLegalize;
1756
1758 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1760 return Legalized;
1761
1762 case TargetOpcode::G_SEXT_INREG: {
1763 if (TypeIdx != 0)
1764 return UnableToLegalize;
1765
1766 int64_t SizeInBits = MI.getOperand(2).getImm();
1767
1768 // So long as the new type has more bits than the bits we're extending we
1769 // don't need to break it apart.
1770 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1772 // We don't lose any non-extension bits by truncating the src and
1773 // sign-extending the dst.
1774 MachineOperand &MO1 = MI.getOperand(1);
1775 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1776 MO1.setReg(TruncMIB.getReg(0));
1777
1778 MachineOperand &MO2 = MI.getOperand(0);
1779 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1781 MIRBuilder.buildSExt(MO2, DstExt);
1782 MO2.setReg(DstExt);
1784 return Legalized;
1785 }
1786
1787 // Break it apart. Components below the extension point are unmodified. The
1788 // component containing the extension point becomes a narrower SEXT_INREG.
1789 // Components above it are ashr'd from the component containing the
1790 // extension point.
1791 if (SizeOp0 % NarrowSize != 0)
1792 return UnableToLegalize;
1793 int NumParts = SizeOp0 / NarrowSize;
1794
1795 // List the registers where the destination will be scattered.
1797 // List the registers where the source will be split.
1799
1800 // Create all the temporary registers.
1801 for (int i = 0; i < NumParts; ++i) {
1802 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1803
1804 SrcRegs.push_back(SrcReg);
1805 }
1806
1807 // Explode the big arguments into smaller chunks.
1808 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1809
1810 Register AshrCstReg =
1811 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1812 .getReg(0);
1813 Register FullExtensionReg;
1814 Register PartialExtensionReg;
1815
1816 // Do the operation on each small part.
1817 for (int i = 0; i < NumParts; ++i) {
1818 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1819 DstRegs.push_back(SrcRegs[i]);
1820 PartialExtensionReg = DstRegs.back();
1821 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1822 assert(PartialExtensionReg &&
1823 "Expected to visit partial extension before full");
1824 if (FullExtensionReg) {
1825 DstRegs.push_back(FullExtensionReg);
1826 continue;
1827 }
1828 DstRegs.push_back(
1829 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1830 .getReg(0));
1831 FullExtensionReg = DstRegs.back();
1832 } else {
1833 DstRegs.push_back(
1835 .buildInstr(
1836 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1837 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1838 .getReg(0));
1839 PartialExtensionReg = DstRegs.back();
1840 }
1841 }
1842
1843 // Gather the destination registers into the final destination.
1844 Register DstReg = MI.getOperand(0).getReg();
1845 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1846 MI.eraseFromParent();
1847 return Legalized;
1848 }
1849 case TargetOpcode::G_BSWAP:
1850 case TargetOpcode::G_BITREVERSE: {
1851 if (SizeOp0 % NarrowSize != 0)
1852 return UnableToLegalize;
1853
1855 SmallVector<Register, 2> SrcRegs, DstRegs;
1856 unsigned NumParts = SizeOp0 / NarrowSize;
1857 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1858 MIRBuilder, MRI);
1859
1860 for (unsigned i = 0; i < NumParts; ++i) {
1861 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1862 {SrcRegs[NumParts - 1 - i]});
1863 DstRegs.push_back(DstPart.getReg(0));
1864 }
1865
1866 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1867
1869 MI.eraseFromParent();
1870 return Legalized;
1871 }
1872 case TargetOpcode::G_PTR_ADD:
1873 case TargetOpcode::G_PTRMASK: {
1874 if (TypeIdx != 1)
1875 return UnableToLegalize;
1877 narrowScalarSrc(MI, NarrowTy, 2);
1879 return Legalized;
1880 }
1881 case TargetOpcode::G_FPTOUI:
1882 case TargetOpcode::G_FPTOSI:
1883 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1884 case TargetOpcode::G_FPEXT:
1885 if (TypeIdx != 0)
1886 return UnableToLegalize;
1888 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1890 return Legalized;
1891 case TargetOpcode::G_FLDEXP:
1892 case TargetOpcode::G_STRICT_FLDEXP:
1893 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1894 case TargetOpcode::G_VSCALE: {
1895 Register Dst = MI.getOperand(0).getReg();
1896 LLT Ty = MRI.getType(Dst);
1897
1898 // Assume VSCALE(1) fits into a legal integer
1899 const APInt One(NarrowTy.getSizeInBits(), 1);
1900 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1901 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1902 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1903 MIRBuilder.buildMul(Dst, ZExt, C);
1904
1905 MI.eraseFromParent();
1906 return Legalized;
1907 }
1908 }
1909}
1910
1912 LLT Ty = MRI.getType(Val);
1913 if (Ty.isScalar())
1914 return Val;
1915
1917 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1918 if (Ty.isPointer()) {
1919 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1920 return Register();
1921 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1922 }
1923
1924 Register NewVal = Val;
1925
1926 assert(Ty.isVector());
1927 if (Ty.isPointerVector())
1928 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1929 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1930}
1931
1933 unsigned OpIdx, unsigned ExtOpcode) {
1934 MachineOperand &MO = MI.getOperand(OpIdx);
1935 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1936 MO.setReg(ExtB.getReg(0));
1937}
1938
1940 unsigned OpIdx) {
1941 MachineOperand &MO = MI.getOperand(OpIdx);
1942 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1943 MO.setReg(ExtB.getReg(0));
1944}
1945
1947 unsigned OpIdx, unsigned TruncOpcode) {
1948 MachineOperand &MO = MI.getOperand(OpIdx);
1949 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1951 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1952 MO.setReg(DstExt);
1953}
1954
1956 unsigned OpIdx, unsigned ExtOpcode) {
1957 MachineOperand &MO = MI.getOperand(OpIdx);
1958 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1960 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1961 MO.setReg(DstTrunc);
1962}
1963
1965 unsigned OpIdx) {
1966 MachineOperand &MO = MI.getOperand(OpIdx);
1968 Register Dst = MO.getReg();
1969 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1970 MO.setReg(DstExt);
1972}
1973
1975 unsigned OpIdx) {
1976 MachineOperand &MO = MI.getOperand(OpIdx);
1979}
1980
1981void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1982 MachineOperand &Op = MI.getOperand(OpIdx);
1983 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1984}
1985
1986void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1987 MachineOperand &MO = MI.getOperand(OpIdx);
1988 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1990 MIRBuilder.buildBitcast(MO, CastDst);
1991 MO.setReg(CastDst);
1992}
1993
1995LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1996 LLT WideTy) {
1997 if (TypeIdx != 1)
1998 return UnableToLegalize;
1999
2000 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2001 if (DstTy.isVector())
2002 return UnableToLegalize;
2003
2004 LLT SrcTy = MRI.getType(Src1Reg);
2005 const int DstSize = DstTy.getSizeInBits();
2006 const int SrcSize = SrcTy.getSizeInBits();
2007 const int WideSize = WideTy.getSizeInBits();
2008 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2009
2010 unsigned NumOps = MI.getNumOperands();
2011 unsigned NumSrc = MI.getNumOperands() - 1;
2012 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2013
2014 if (WideSize >= DstSize) {
2015 // Directly pack the bits in the target type.
2016 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2017
2018 for (unsigned I = 2; I != NumOps; ++I) {
2019 const unsigned Offset = (I - 1) * PartSize;
2020
2021 Register SrcReg = MI.getOperand(I).getReg();
2022 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2023
2024 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2025
2026 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2027 MRI.createGenericVirtualRegister(WideTy);
2028
2029 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2030 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2031 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2032 ResultReg = NextResult;
2033 }
2034
2035 if (WideSize > DstSize)
2036 MIRBuilder.buildTrunc(DstReg, ResultReg);
2037 else if (DstTy.isPointer())
2038 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2039
2040 MI.eraseFromParent();
2041 return Legalized;
2042 }
2043
2044 // Unmerge the original values to the GCD type, and recombine to the next
2045 // multiple greater than the original type.
2046 //
2047 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2048 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2049 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2050 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2051 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2052 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2053 // %12:_(s12) = G_MERGE_VALUES %10, %11
2054 //
2055 // Padding with undef if necessary:
2056 //
2057 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2058 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2059 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2060 // %7:_(s2) = G_IMPLICIT_DEF
2061 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2062 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2063 // %10:_(s12) = G_MERGE_VALUES %8, %9
2064
2065 const int GCD = std::gcd(SrcSize, WideSize);
2066 LLT GCDTy = LLT::scalar(GCD);
2067
2069 SmallVector<Register, 8> NewMergeRegs;
2070 SmallVector<Register, 8> Unmerges;
2071 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2072
2073 // Decompose the original operands if they don't evenly divide.
2074 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2075 Register SrcReg = MO.getReg();
2076 if (GCD == SrcSize) {
2077 Unmerges.push_back(SrcReg);
2078 } else {
2079 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2080 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2081 Unmerges.push_back(Unmerge.getReg(J));
2082 }
2083 }
2084
2085 // Pad with undef to the next size that is a multiple of the requested size.
2086 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2087 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2088 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2089 Unmerges.push_back(UndefReg);
2090 }
2091
2092 const int PartsPerGCD = WideSize / GCD;
2093
2094 // Build merges of each piece.
2095 ArrayRef<Register> Slicer(Unmerges);
2096 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2097 auto Merge =
2098 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2099 NewMergeRegs.push_back(Merge.getReg(0));
2100 }
2101
2102 // A truncate may be necessary if the requested type doesn't evenly divide the
2103 // original result type.
2104 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2105 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2106 } else {
2107 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2108 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2109 }
2110
2111 MI.eraseFromParent();
2112 return Legalized;
2113}
2114
2116LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2117 LLT WideTy) {
2118 if (TypeIdx != 0)
2119 return UnableToLegalize;
2120
2121 int NumDst = MI.getNumOperands() - 1;
2122 Register SrcReg = MI.getOperand(NumDst).getReg();
2123 LLT SrcTy = MRI.getType(SrcReg);
2124 if (SrcTy.isVector())
2125 return UnableToLegalize;
2126
2127 Register Dst0Reg = MI.getOperand(0).getReg();
2128 LLT DstTy = MRI.getType(Dst0Reg);
2129 if (!DstTy.isScalar())
2130 return UnableToLegalize;
2131
2132 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2133 if (SrcTy.isPointer()) {
2135 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2136 LLVM_DEBUG(
2137 dbgs() << "Not casting non-integral address space integer\n");
2138 return UnableToLegalize;
2139 }
2140
2141 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2142 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2143 }
2144
2145 // Widen SrcTy to WideTy. This does not affect the result, but since the
2146 // user requested this size, it is probably better handled than SrcTy and
2147 // should reduce the total number of legalization artifacts.
2148 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2149 SrcTy = WideTy;
2150 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2151 }
2152
2153 // Theres no unmerge type to target. Directly extract the bits from the
2154 // source type
2155 unsigned DstSize = DstTy.getSizeInBits();
2156
2157 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2158 for (int I = 1; I != NumDst; ++I) {
2159 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2160 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2161 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2162 }
2163
2164 MI.eraseFromParent();
2165 return Legalized;
2166 }
2167
2168 // Extend the source to a wider type.
2169 LLT LCMTy = getLCMType(SrcTy, WideTy);
2170
2171 Register WideSrc = SrcReg;
2172 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2173 // TODO: If this is an integral address space, cast to integer and anyext.
2174 if (SrcTy.isPointer()) {
2175 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2176 return UnableToLegalize;
2177 }
2178
2179 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2180 }
2181
2182 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2183
2184 // Create a sequence of unmerges and merges to the original results. Since we
2185 // may have widened the source, we will need to pad the results with dead defs
2186 // to cover the source register.
2187 // e.g. widen s48 to s64:
2188 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2189 //
2190 // =>
2191 // %4:_(s192) = G_ANYEXT %0:_(s96)
2192 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2193 // ; unpack to GCD type, with extra dead defs
2194 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2195 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2196 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2197 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2198 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2199 const LLT GCDTy = getGCDType(WideTy, DstTy);
2200 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2201 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2202
2203 // Directly unmerge to the destination without going through a GCD type
2204 // if possible
2205 if (PartsPerRemerge == 1) {
2206 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2207
2208 for (int I = 0; I != NumUnmerge; ++I) {
2209 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2210
2211 for (int J = 0; J != PartsPerUnmerge; ++J) {
2212 int Idx = I * PartsPerUnmerge + J;
2213 if (Idx < NumDst)
2214 MIB.addDef(MI.getOperand(Idx).getReg());
2215 else {
2216 // Create dead def for excess components.
2217 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2218 }
2219 }
2220
2221 MIB.addUse(Unmerge.getReg(I));
2222 }
2223 } else {
2225 for (int J = 0; J != NumUnmerge; ++J)
2226 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2227
2228 SmallVector<Register, 8> RemergeParts;
2229 for (int I = 0; I != NumDst; ++I) {
2230 for (int J = 0; J < PartsPerRemerge; ++J) {
2231 const int Idx = I * PartsPerRemerge + J;
2232 RemergeParts.emplace_back(Parts[Idx]);
2233 }
2234
2235 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2236 RemergeParts.clear();
2237 }
2238 }
2239
2240 MI.eraseFromParent();
2241 return Legalized;
2242}
2243
2245LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2246 LLT WideTy) {
2247 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2248 unsigned Offset = MI.getOperand(2).getImm();
2249
2250 if (TypeIdx == 0) {
2251 if (SrcTy.isVector() || DstTy.isVector())
2252 return UnableToLegalize;
2253
2254 SrcOp Src(SrcReg);
2255 if (SrcTy.isPointer()) {
2256 // Extracts from pointers can be handled only if they are really just
2257 // simple integers.
2259 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2260 return UnableToLegalize;
2261
2262 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2263 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2264 SrcTy = SrcAsIntTy;
2265 }
2266
2267 if (DstTy.isPointer())
2268 return UnableToLegalize;
2269
2270 if (Offset == 0) {
2271 // Avoid a shift in the degenerate case.
2272 MIRBuilder.buildTrunc(DstReg,
2273 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2274 MI.eraseFromParent();
2275 return Legalized;
2276 }
2277
2278 // Do a shift in the source type.
2279 LLT ShiftTy = SrcTy;
2280 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2281 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2282 ShiftTy = WideTy;
2283 }
2284
2285 auto LShr = MIRBuilder.buildLShr(
2286 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2287 MIRBuilder.buildTrunc(DstReg, LShr);
2288 MI.eraseFromParent();
2289 return Legalized;
2290 }
2291
2292 if (SrcTy.isScalar()) {
2294 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2296 return Legalized;
2297 }
2298
2299 if (!SrcTy.isVector())
2300 return UnableToLegalize;
2301
2302 if (DstTy != SrcTy.getElementType())
2303 return UnableToLegalize;
2304
2305 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2306 return UnableToLegalize;
2307
2309 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2310
2311 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2312 Offset);
2313 widenScalarDst(MI, WideTy.getScalarType(), 0);
2315 return Legalized;
2316}
2317
2319LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2320 LLT WideTy) {
2321 if (TypeIdx != 0 || WideTy.isVector())
2322 return UnableToLegalize;
2324 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2325 widenScalarDst(MI, WideTy);
2327 return Legalized;
2328}
2329
2331LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2332 LLT WideTy) {
2333 unsigned Opcode;
2334 unsigned ExtOpcode;
2335 std::optional<Register> CarryIn;
2336 switch (MI.getOpcode()) {
2337 default:
2338 llvm_unreachable("Unexpected opcode!");
2339 case TargetOpcode::G_SADDO:
2340 Opcode = TargetOpcode::G_ADD;
2341 ExtOpcode = TargetOpcode::G_SEXT;
2342 break;
2343 case TargetOpcode::G_SSUBO:
2344 Opcode = TargetOpcode::G_SUB;
2345 ExtOpcode = TargetOpcode::G_SEXT;
2346 break;
2347 case TargetOpcode::G_UADDO:
2348 Opcode = TargetOpcode::G_ADD;
2349 ExtOpcode = TargetOpcode::G_ZEXT;
2350 break;
2351 case TargetOpcode::G_USUBO:
2352 Opcode = TargetOpcode::G_SUB;
2353 ExtOpcode = TargetOpcode::G_ZEXT;
2354 break;
2355 case TargetOpcode::G_SADDE:
2356 Opcode = TargetOpcode::G_UADDE;
2357 ExtOpcode = TargetOpcode::G_SEXT;
2358 CarryIn = MI.getOperand(4).getReg();
2359 break;
2360 case TargetOpcode::G_SSUBE:
2361 Opcode = TargetOpcode::G_USUBE;
2362 ExtOpcode = TargetOpcode::G_SEXT;
2363 CarryIn = MI.getOperand(4).getReg();
2364 break;
2365 case TargetOpcode::G_UADDE:
2366 Opcode = TargetOpcode::G_UADDE;
2367 ExtOpcode = TargetOpcode::G_ZEXT;
2368 CarryIn = MI.getOperand(4).getReg();
2369 break;
2370 case TargetOpcode::G_USUBE:
2371 Opcode = TargetOpcode::G_USUBE;
2372 ExtOpcode = TargetOpcode::G_ZEXT;
2373 CarryIn = MI.getOperand(4).getReg();
2374 break;
2375 }
2376
2377 if (TypeIdx == 1) {
2378 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2379
2381 if (CarryIn)
2382 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2383 widenScalarDst(MI, WideTy, 1);
2384
2386 return Legalized;
2387 }
2388
2389 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2390 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2391 // Do the arithmetic in the larger type.
2392 Register NewOp;
2393 if (CarryIn) {
2394 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2395 NewOp = MIRBuilder
2396 .buildInstr(Opcode, {WideTy, CarryOutTy},
2397 {LHSExt, RHSExt, *CarryIn})
2398 .getReg(0);
2399 } else {
2400 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2401 }
2402 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2403 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2404 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2405 // There is no overflow if the ExtOp is the same as NewOp.
2406 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2407 // Now trunc the NewOp to the original result.
2408 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2409 MI.eraseFromParent();
2410 return Legalized;
2411}
2412
2414LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2415 LLT WideTy) {
2416 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2417 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2418 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2419 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2420 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2421 // We can convert this to:
2422 // 1. Any extend iN to iM
2423 // 2. SHL by M-N
2424 // 3. [US][ADD|SUB|SHL]SAT
2425 // 4. L/ASHR by M-N
2426 //
2427 // It may be more efficient to lower this to a min and a max operation in
2428 // the higher precision arithmetic if the promoted operation isn't legal,
2429 // but this decision is up to the target's lowering request.
2430 Register DstReg = MI.getOperand(0).getReg();
2431
2432 unsigned NewBits = WideTy.getScalarSizeInBits();
2433 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2434
2435 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2436 // must not left shift the RHS to preserve the shift amount.
2437 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2438 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2439 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2440 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2441 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2442 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2443
2444 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2445 {ShiftL, ShiftR}, MI.getFlags());
2446
2447 // Use a shift that will preserve the number of sign bits when the trunc is
2448 // folded away.
2449 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2450 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2451
2452 MIRBuilder.buildTrunc(DstReg, Result);
2453 MI.eraseFromParent();
2454 return Legalized;
2455}
2456
2458LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2459 LLT WideTy) {
2460 if (TypeIdx == 1) {
2462 widenScalarDst(MI, WideTy, 1);
2464 return Legalized;
2465 }
2466
2467 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2468 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2469 LLT SrcTy = MRI.getType(LHS);
2470 LLT OverflowTy = MRI.getType(OriginalOverflow);
2471 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2472
2473 // To determine if the result overflowed in the larger type, we extend the
2474 // input to the larger type, do the multiply (checking if it overflows),
2475 // then also check the high bits of the result to see if overflow happened
2476 // there.
2477 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2478 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2479 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2480
2481 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2482 // so we don't need to check the overflow result of larger type Mulo.
2483 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2484
2485 unsigned MulOpc =
2486 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2487
2489 if (WideMulCanOverflow)
2490 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2491 {LeftOperand, RightOperand});
2492 else
2493 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2494
2495 auto Mul = Mulo->getOperand(0);
2496 MIRBuilder.buildTrunc(Result, Mul);
2497
2498 MachineInstrBuilder ExtResult;
2499 // Overflow occurred if it occurred in the larger type, or if the high part
2500 // of the result does not zero/sign-extend the low part. Check this second
2501 // possibility first.
2502 if (IsSigned) {
2503 // For signed, overflow occurred when the high part does not sign-extend
2504 // the low part.
2505 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2506 } else {
2507 // Unsigned overflow occurred when the high part does not zero-extend the
2508 // low part.
2509 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2510 }
2511
2512 if (WideMulCanOverflow) {
2513 auto Overflow =
2514 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2515 // Finally check if the multiplication in the larger type itself overflowed.
2516 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2517 } else {
2518 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2519 }
2520 MI.eraseFromParent();
2521 return Legalized;
2522}
2523
2526 unsigned Opcode = MI.getOpcode();
2527 switch (Opcode) {
2528 default:
2529 return UnableToLegalize;
2530 case TargetOpcode::G_ATOMICRMW_XCHG:
2531 case TargetOpcode::G_ATOMICRMW_ADD:
2532 case TargetOpcode::G_ATOMICRMW_SUB:
2533 case TargetOpcode::G_ATOMICRMW_AND:
2534 case TargetOpcode::G_ATOMICRMW_OR:
2535 case TargetOpcode::G_ATOMICRMW_XOR:
2536 case TargetOpcode::G_ATOMICRMW_MIN:
2537 case TargetOpcode::G_ATOMICRMW_MAX:
2538 case TargetOpcode::G_ATOMICRMW_UMIN:
2539 case TargetOpcode::G_ATOMICRMW_UMAX:
2540 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2542 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2543 widenScalarDst(MI, WideTy, 0);
2545 return Legalized;
2546 case TargetOpcode::G_ATOMIC_CMPXCHG:
2547 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2549 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2550 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2551 widenScalarDst(MI, WideTy, 0);
2553 return Legalized;
2554 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2555 if (TypeIdx == 0) {
2557 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2558 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2559 widenScalarDst(MI, WideTy, 0);
2561 return Legalized;
2562 }
2563 assert(TypeIdx == 1 &&
2564 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2566 widenScalarDst(MI, WideTy, 1);
2568 return Legalized;
2569 case TargetOpcode::G_EXTRACT:
2570 return widenScalarExtract(MI, TypeIdx, WideTy);
2571 case TargetOpcode::G_INSERT:
2572 return widenScalarInsert(MI, TypeIdx, WideTy);
2573 case TargetOpcode::G_MERGE_VALUES:
2574 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2575 case TargetOpcode::G_UNMERGE_VALUES:
2576 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2577 case TargetOpcode::G_SADDO:
2578 case TargetOpcode::G_SSUBO:
2579 case TargetOpcode::G_UADDO:
2580 case TargetOpcode::G_USUBO:
2581 case TargetOpcode::G_SADDE:
2582 case TargetOpcode::G_SSUBE:
2583 case TargetOpcode::G_UADDE:
2584 case TargetOpcode::G_USUBE:
2585 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2586 case TargetOpcode::G_UMULO:
2587 case TargetOpcode::G_SMULO:
2588 return widenScalarMulo(MI, TypeIdx, WideTy);
2589 case TargetOpcode::G_SADDSAT:
2590 case TargetOpcode::G_SSUBSAT:
2591 case TargetOpcode::G_SSHLSAT:
2592 case TargetOpcode::G_UADDSAT:
2593 case TargetOpcode::G_USUBSAT:
2594 case TargetOpcode::G_USHLSAT:
2595 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2596 case TargetOpcode::G_CTTZ:
2597 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2598 case TargetOpcode::G_CTLZ:
2599 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2600 case TargetOpcode::G_CTPOP: {
2601 if (TypeIdx == 0) {
2603 widenScalarDst(MI, WideTy, 0);
2605 return Legalized;
2606 }
2607
2608 Register SrcReg = MI.getOperand(1).getReg();
2609
2610 // First extend the input.
2611 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2612 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2613 ? TargetOpcode::G_ANYEXT
2614 : TargetOpcode::G_ZEXT;
2615 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2616 LLT CurTy = MRI.getType(SrcReg);
2617 unsigned NewOpc = Opcode;
2618 if (NewOpc == TargetOpcode::G_CTTZ) {
2619 // The count is the same in the larger type except if the original
2620 // value was zero. This can be handled by setting the bit just off
2621 // the top of the original type.
2622 auto TopBit =
2624 MIBSrc = MIRBuilder.buildOr(
2625 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2626 // Now we know the operand is non-zero, use the more relaxed opcode.
2627 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2628 }
2629
2630 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2631
2632 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2633 // An optimization where the result is the CTLZ after the left shift by
2634 // (Difference in widety and current ty), that is,
2635 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2636 // Result = ctlz MIBSrc
2637 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2638 MIRBuilder.buildConstant(WideTy, SizeDiff));
2639 }
2640
2641 // Perform the operation at the larger size.
2642 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2643 // This is already the correct result for CTPOP and CTTZs
2644 if (Opcode == TargetOpcode::G_CTLZ) {
2645 // The correct result is NewOp - (Difference in widety and current ty).
2646 MIBNewOp = MIRBuilder.buildSub(
2647 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2648 }
2649
2650 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2651 MI.eraseFromParent();
2652 return Legalized;
2653 }
2654 case TargetOpcode::G_BSWAP: {
2656 Register DstReg = MI.getOperand(0).getReg();
2657
2658 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2659 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2660 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2661 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2662
2663 MI.getOperand(0).setReg(DstExt);
2664
2666
2667 LLT Ty = MRI.getType(DstReg);
2668 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2669 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2670 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2671
2672 MIRBuilder.buildTrunc(DstReg, ShrReg);
2674 return Legalized;
2675 }
2676 case TargetOpcode::G_BITREVERSE: {
2678
2679 Register DstReg = MI.getOperand(0).getReg();
2680 LLT Ty = MRI.getType(DstReg);
2681 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2682
2683 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2684 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2685 MI.getOperand(0).setReg(DstExt);
2687
2688 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2689 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2690 MIRBuilder.buildTrunc(DstReg, Shift);
2692 return Legalized;
2693 }
2694 case TargetOpcode::G_FREEZE:
2695 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2697 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2698 widenScalarDst(MI, WideTy);
2700 return Legalized;
2701
2702 case TargetOpcode::G_ABS:
2704 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2705 widenScalarDst(MI, WideTy);
2707 return Legalized;
2708
2709 case TargetOpcode::G_ADD:
2710 case TargetOpcode::G_AND:
2711 case TargetOpcode::G_MUL:
2712 case TargetOpcode::G_OR:
2713 case TargetOpcode::G_XOR:
2714 case TargetOpcode::G_SUB:
2715 case TargetOpcode::G_SHUFFLE_VECTOR:
2716 // Perform operation at larger width (any extension is fines here, high bits
2717 // don't affect the result) and then truncate the result back to the
2718 // original type.
2720 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2721 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2722 widenScalarDst(MI, WideTy);
2724 return Legalized;
2725
2726 case TargetOpcode::G_SBFX:
2727 case TargetOpcode::G_UBFX:
2729
2730 if (TypeIdx == 0) {
2731 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2732 widenScalarDst(MI, WideTy);
2733 } else {
2734 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2735 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2736 }
2737
2739 return Legalized;
2740
2741 case TargetOpcode::G_SHL:
2743
2744 if (TypeIdx == 0) {
2745 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2746 widenScalarDst(MI, WideTy);
2747 } else {
2748 assert(TypeIdx == 1);
2749 // The "number of bits to shift" operand must preserve its value as an
2750 // unsigned integer:
2751 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2752 }
2753
2755 return Legalized;
2756
2757 case TargetOpcode::G_ROTR:
2758 case TargetOpcode::G_ROTL:
2759 if (TypeIdx != 1)
2760 return UnableToLegalize;
2761
2763 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2765 return Legalized;
2766
2767 case TargetOpcode::G_SDIV:
2768 case TargetOpcode::G_SREM:
2769 case TargetOpcode::G_SMIN:
2770 case TargetOpcode::G_SMAX:
2772 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2773 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2774 widenScalarDst(MI, WideTy);
2776 return Legalized;
2777
2778 case TargetOpcode::G_SDIVREM:
2780 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2781 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2782 widenScalarDst(MI, WideTy);
2783 widenScalarDst(MI, WideTy, 1);
2785 return Legalized;
2786
2787 case TargetOpcode::G_ASHR:
2788 case TargetOpcode::G_LSHR:
2790
2791 if (TypeIdx == 0) {
2792 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2793 : TargetOpcode::G_ZEXT;
2794
2795 widenScalarSrc(MI, WideTy, 1, CvtOp);
2796 widenScalarDst(MI, WideTy);
2797 } else {
2798 assert(TypeIdx == 1);
2799 // The "number of bits to shift" operand must preserve its value as an
2800 // unsigned integer:
2801 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2802 }
2803
2805 return Legalized;
2806 case TargetOpcode::G_UDIV:
2807 case TargetOpcode::G_UREM:
2808 case TargetOpcode::G_UMIN:
2809 case TargetOpcode::G_UMAX:
2811 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2812 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2813 widenScalarDst(MI, WideTy);
2815 return Legalized;
2816
2817 case TargetOpcode::G_UDIVREM:
2819 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2820 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2821 widenScalarDst(MI, WideTy);
2822 widenScalarDst(MI, WideTy, 1);
2824 return Legalized;
2825
2826 case TargetOpcode::G_SELECT:
2828 if (TypeIdx == 0) {
2829 // Perform operation at larger width (any extension is fine here, high
2830 // bits don't affect the result) and then truncate the result back to the
2831 // original type.
2832 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2833 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2834 widenScalarDst(MI, WideTy);
2835 } else {
2836 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2837 // Explicit extension is required here since high bits affect the result.
2838 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2839 }
2841 return Legalized;
2842
2843 case TargetOpcode::G_FPTOSI:
2844 case TargetOpcode::G_FPTOUI:
2845 case TargetOpcode::G_INTRINSIC_LRINT:
2846 case TargetOpcode::G_INTRINSIC_LLRINT:
2847 case TargetOpcode::G_IS_FPCLASS:
2849
2850 if (TypeIdx == 0)
2851 widenScalarDst(MI, WideTy);
2852 else
2853 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2854
2856 return Legalized;
2857 case TargetOpcode::G_SITOFP:
2859
2860 if (TypeIdx == 0)
2861 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2862 else
2863 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2864
2866 return Legalized;
2867 case TargetOpcode::G_UITOFP:
2869
2870 if (TypeIdx == 0)
2871 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2872 else
2873 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2874
2876 return Legalized;
2877 case TargetOpcode::G_LOAD:
2878 case TargetOpcode::G_SEXTLOAD:
2879 case TargetOpcode::G_ZEXTLOAD:
2881 widenScalarDst(MI, WideTy);
2883 return Legalized;
2884
2885 case TargetOpcode::G_STORE: {
2886 if (TypeIdx != 0)
2887 return UnableToLegalize;
2888
2889 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2890 if (!Ty.isScalar())
2891 return UnableToLegalize;
2892
2894
2895 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2896 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2897 widenScalarSrc(MI, WideTy, 0, ExtType);
2898
2900 return Legalized;
2901 }
2902 case TargetOpcode::G_CONSTANT: {
2903 MachineOperand &SrcMO = MI.getOperand(1);
2905 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2906 MRI.getType(MI.getOperand(0).getReg()));
2907 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2908 ExtOpc == TargetOpcode::G_ANYEXT) &&
2909 "Illegal Extend");
2910 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2911 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2912 ? SrcVal.sext(WideTy.getSizeInBits())
2913 : SrcVal.zext(WideTy.getSizeInBits());
2915 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2916
2917 widenScalarDst(MI, WideTy);
2919 return Legalized;
2920 }
2921 case TargetOpcode::G_FCONSTANT: {
2922 // To avoid changing the bits of the constant due to extension to a larger
2923 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2924 MachineOperand &SrcMO = MI.getOperand(1);
2925 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2927 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2928 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2929 MI.eraseFromParent();
2930 return Legalized;
2931 }
2932 case TargetOpcode::G_IMPLICIT_DEF: {
2934 widenScalarDst(MI, WideTy);
2936 return Legalized;
2937 }
2938 case TargetOpcode::G_BRCOND:
2940 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2942 return Legalized;
2943
2944 case TargetOpcode::G_FCMP:
2946 if (TypeIdx == 0)
2947 widenScalarDst(MI, WideTy);
2948 else {
2949 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2950 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2951 }
2953 return Legalized;
2954
2955 case TargetOpcode::G_ICMP:
2957 if (TypeIdx == 0)
2958 widenScalarDst(MI, WideTy);
2959 else {
2960 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2961 MI.getOperand(1).getPredicate()))
2962 ? TargetOpcode::G_SEXT
2963 : TargetOpcode::G_ZEXT;
2964 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2965 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2966 }
2968 return Legalized;
2969
2970 case TargetOpcode::G_PTR_ADD:
2971 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2973 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2975 return Legalized;
2976
2977 case TargetOpcode::G_PHI: {
2978 assert(TypeIdx == 0 && "Expecting only Idx 0");
2979
2981 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2982 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2984 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2985 }
2986
2987 MachineBasicBlock &MBB = *MI.getParent();
2989 widenScalarDst(MI, WideTy);
2991 return Legalized;
2992 }
2993 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2994 if (TypeIdx == 0) {
2995 Register VecReg = MI.getOperand(1).getReg();
2996 LLT VecTy = MRI.getType(VecReg);
2998
3000 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3001 TargetOpcode::G_ANYEXT);
3002
3003 widenScalarDst(MI, WideTy, 0);
3005 return Legalized;
3006 }
3007
3008 if (TypeIdx != 2)
3009 return UnableToLegalize;
3011 // TODO: Probably should be zext
3012 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3014 return Legalized;
3015 }
3016 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3017 if (TypeIdx == 0) {
3019 const LLT WideEltTy = WideTy.getElementType();
3020
3021 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3022 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3023 widenScalarDst(MI, WideTy, 0);
3025 return Legalized;
3026 }
3027
3028 if (TypeIdx == 1) {
3030
3031 Register VecReg = MI.getOperand(1).getReg();
3032 LLT VecTy = MRI.getType(VecReg);
3033 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3034
3035 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3036 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3037 widenScalarDst(MI, WideVecTy, 0);
3039 return Legalized;
3040 }
3041
3042 if (TypeIdx == 2) {
3044 // TODO: Probably should be zext
3045 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3047 return Legalized;
3048 }
3049
3050 return UnableToLegalize;
3051 }
3052 case TargetOpcode::G_FADD:
3053 case TargetOpcode::G_FMUL:
3054 case TargetOpcode::G_FSUB:
3055 case TargetOpcode::G_FMA:
3056 case TargetOpcode::G_FMAD:
3057 case TargetOpcode::G_FNEG:
3058 case TargetOpcode::G_FABS:
3059 case TargetOpcode::G_FCANONICALIZE:
3060 case TargetOpcode::G_FMINNUM:
3061 case TargetOpcode::G_FMAXNUM:
3062 case TargetOpcode::G_FMINNUM_IEEE:
3063 case TargetOpcode::G_FMAXNUM_IEEE:
3064 case TargetOpcode::G_FMINIMUM:
3065 case TargetOpcode::G_FMAXIMUM:
3066 case TargetOpcode::G_FDIV:
3067 case TargetOpcode::G_FREM:
3068 case TargetOpcode::G_FCEIL:
3069 case TargetOpcode::G_FFLOOR:
3070 case TargetOpcode::G_FCOS:
3071 case TargetOpcode::G_FSIN:
3072 case TargetOpcode::G_FTAN:
3073 case TargetOpcode::G_FACOS:
3074 case TargetOpcode::G_FASIN:
3075 case TargetOpcode::G_FATAN:
3076 case TargetOpcode::G_FCOSH:
3077 case TargetOpcode::G_FSINH:
3078 case TargetOpcode::G_FTANH:
3079 case TargetOpcode::G_FLOG10:
3080 case TargetOpcode::G_FLOG:
3081 case TargetOpcode::G_FLOG2:
3082 case TargetOpcode::G_FRINT:
3083 case TargetOpcode::G_FNEARBYINT:
3084 case TargetOpcode::G_FSQRT:
3085 case TargetOpcode::G_FEXP:
3086 case TargetOpcode::G_FEXP2:
3087 case TargetOpcode::G_FEXP10:
3088 case TargetOpcode::G_FPOW:
3089 case TargetOpcode::G_INTRINSIC_TRUNC:
3090 case TargetOpcode::G_INTRINSIC_ROUND:
3091 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3092 assert(TypeIdx == 0);
3094
3095 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3096 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3097
3098 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3100 return Legalized;
3101 case TargetOpcode::G_FPOWI:
3102 case TargetOpcode::G_FLDEXP:
3103 case TargetOpcode::G_STRICT_FLDEXP: {
3104 if (TypeIdx == 0) {
3105 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3106 return UnableToLegalize;
3107
3109 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3110 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3112 return Legalized;
3113 }
3114
3115 if (TypeIdx == 1) {
3116 // For some reason SelectionDAG tries to promote to a libcall without
3117 // actually changing the integer type for promotion.
3119 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3121 return Legalized;
3122 }
3123
3124 return UnableToLegalize;
3125 }
3126 case TargetOpcode::G_FFREXP: {
3128
3129 if (TypeIdx == 0) {
3130 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3131 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3132 } else {
3133 widenScalarDst(MI, WideTy, 1);
3134 }
3135
3137 return Legalized;
3138 }
3139 case TargetOpcode::G_INTTOPTR:
3140 if (TypeIdx != 1)
3141 return UnableToLegalize;
3142
3144 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3146 return Legalized;
3147 case TargetOpcode::G_PTRTOINT:
3148 if (TypeIdx != 0)
3149 return UnableToLegalize;
3150
3152 widenScalarDst(MI, WideTy, 0);
3154 return Legalized;
3155 case TargetOpcode::G_BUILD_VECTOR: {
3157
3158 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3159 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3160 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3161
3162 // Avoid changing the result vector type if the source element type was
3163 // requested.
3164 if (TypeIdx == 1) {
3165 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3166 } else {
3167 widenScalarDst(MI, WideTy, 0);
3168 }
3169
3171 return Legalized;
3172 }
3173 case TargetOpcode::G_SEXT_INREG:
3174 if (TypeIdx != 0)
3175 return UnableToLegalize;
3176
3178 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3179 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3181 return Legalized;
3182 case TargetOpcode::G_PTRMASK: {
3183 if (TypeIdx != 1)
3184 return UnableToLegalize;
3186 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3188 return Legalized;
3189 }
3190 case TargetOpcode::G_VECREDUCE_FADD:
3191 case TargetOpcode::G_VECREDUCE_FMUL:
3192 case TargetOpcode::G_VECREDUCE_FMIN:
3193 case TargetOpcode::G_VECREDUCE_FMAX:
3194 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3195 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3196 if (TypeIdx != 0)
3197 return UnableToLegalize;
3199 Register VecReg = MI.getOperand(1).getReg();
3200 LLT VecTy = MRI.getType(VecReg);
3201 LLT WideVecTy = VecTy.isVector()
3202 ? LLT::vector(VecTy.getElementCount(), WideTy)
3203 : WideTy;
3204 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3205 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3207 return Legalized;
3208 }
3209 case TargetOpcode::G_VSCALE: {
3210 MachineOperand &SrcMO = MI.getOperand(1);
3212 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3213 // The CImm is always a signed value
3214 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3216 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3217 widenScalarDst(MI, WideTy);
3219 return Legalized;
3220 }
3221 case TargetOpcode::G_SPLAT_VECTOR: {
3222 if (TypeIdx != 1)
3223 return UnableToLegalize;
3224
3226 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3228 return Legalized;
3229 }
3230 }
3231}
3232
3234 MachineIRBuilder &B, Register Src, LLT Ty) {
3235 auto Unmerge = B.buildUnmerge(Ty, Src);
3236 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3237 Pieces.push_back(Unmerge.getReg(I));
3238}
3239
3240static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3241 MachineIRBuilder &MIRBuilder) {
3242 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3243 MachineFunction &MF = MIRBuilder.getMF();
3244 const DataLayout &DL = MIRBuilder.getDataLayout();
3245 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3246 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3247 LLT DstLLT = MRI.getType(DstReg);
3248
3249 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3250
3251 auto Addr = MIRBuilder.buildConstantPool(
3252 AddrPtrTy,
3253 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3254
3255 MachineMemOperand *MMO =
3257 MachineMemOperand::MOLoad, DstLLT, Alignment);
3258
3259 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3260}
3261
3264 const MachineOperand &ConstOperand = MI.getOperand(1);
3265 const Constant *ConstantVal = ConstOperand.getCImm();
3266
3267 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3268 MI.eraseFromParent();
3269
3270 return Legalized;
3271}
3272
3275 const MachineOperand &ConstOperand = MI.getOperand(1);
3276 const Constant *ConstantVal = ConstOperand.getFPImm();
3277
3278 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3279 MI.eraseFromParent();
3280
3281 return Legalized;
3282}
3283
3286 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3287 if (SrcTy.isVector()) {
3288 LLT SrcEltTy = SrcTy.getElementType();
3290
3291 if (DstTy.isVector()) {
3292 int NumDstElt = DstTy.getNumElements();
3293 int NumSrcElt = SrcTy.getNumElements();
3294
3295 LLT DstEltTy = DstTy.getElementType();
3296 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3297 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3298
3299 // If there's an element size mismatch, insert intermediate casts to match
3300 // the result element type.
3301 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3302 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3303 //
3304 // =>
3305 //
3306 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3307 // %3:_(<2 x s8>) = G_BITCAST %2
3308 // %4:_(<2 x s8>) = G_BITCAST %3
3309 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3310 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3311 SrcPartTy = SrcEltTy;
3312 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3313 //
3314 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3315 //
3316 // =>
3317 //
3318 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3319 // %3:_(s16) = G_BITCAST %2
3320 // %4:_(s16) = G_BITCAST %3
3321 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3322 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3323 DstCastTy = DstEltTy;
3324 }
3325
3326 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3327 for (Register &SrcReg : SrcRegs)
3328 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3329 } else
3330 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3331
3332 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3333 MI.eraseFromParent();
3334 return Legalized;
3335 }
3336
3337 if (DstTy.isVector()) {
3339 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3340 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3341 MI.eraseFromParent();
3342 return Legalized;
3343 }
3344
3345 return UnableToLegalize;
3346}
3347
3348/// Figure out the bit offset into a register when coercing a vector index for
3349/// the wide element type. This is only for the case when promoting vector to
3350/// one with larger elements.
3351//
3352///
3353/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3354/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3356 Register Idx,
3357 unsigned NewEltSize,
3358 unsigned OldEltSize) {
3359 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3360 LLT IdxTy = B.getMRI()->getType(Idx);
3361
3362 // Now figure out the amount we need to shift to get the target bits.
3363 auto OffsetMask = B.buildConstant(
3364 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3365 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3366 return B.buildShl(IdxTy, OffsetIdx,
3367 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3368}
3369
3370/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3371/// is casting to a vector with a smaller element size, perform multiple element
3372/// extracts and merge the results. If this is coercing to a vector with larger
3373/// elements, index the bitcasted vector and extract the target element with bit
3374/// operations. This is intended to force the indexing in the native register
3375/// size for architectures that can dynamically index the register file.
3378 LLT CastTy) {
3379 if (TypeIdx != 1)
3380 return UnableToLegalize;
3381
3382 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3383
3384 LLT SrcEltTy = SrcVecTy.getElementType();
3385 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3386 unsigned OldNumElts = SrcVecTy.getNumElements();
3387
3388 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3389 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3390
3391 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3392 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3393 if (NewNumElts > OldNumElts) {
3394 // Decreasing the vector element size
3395 //
3396 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3397 // =>
3398 // v4i32:castx = bitcast x:v2i64
3399 //
3400 // i64 = bitcast
3401 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3402 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3403 //
3404 if (NewNumElts % OldNumElts != 0)
3405 return UnableToLegalize;
3406
3407 // Type of the intermediate result vector.
3408 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3409 LLT MidTy =
3410 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3411
3412 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3413
3414 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3415 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3416
3417 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3418 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3419 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3420 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3421 NewOps[I] = Elt.getReg(0);
3422 }
3423
3424 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3425 MIRBuilder.buildBitcast(Dst, NewVec);
3426 MI.eraseFromParent();
3427 return Legalized;
3428 }
3429
3430 if (NewNumElts < OldNumElts) {
3431 if (NewEltSize % OldEltSize != 0)
3432 return UnableToLegalize;
3433
3434 // This only depends on powers of 2 because we use bit tricks to figure out
3435 // the bit offset we need to shift to get the target element. A general
3436 // expansion could emit division/multiply.
3437 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3438 return UnableToLegalize;
3439
3440 // Increasing the vector element size.
3441 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3442 //
3443 // =>
3444 //
3445 // %cast = G_BITCAST %vec
3446 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3447 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3448 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3449 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3450 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3451 // %elt = G_TRUNC %elt_bits
3452
3453 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3454 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3455
3456 // Divide to get the index in the wider element type.
3457 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3458
3459 Register WideElt = CastVec;
3460 if (CastTy.isVector()) {
3461 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3462 ScaledIdx).getReg(0);
3463 }
3464
3465 // Compute the bit offset into the register of the target element.
3467 MIRBuilder, Idx, NewEltSize, OldEltSize);
3468
3469 // Shift the wide element to get the target element.
3470 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3471 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3472 MI.eraseFromParent();
3473 return Legalized;
3474 }
3475
3476 return UnableToLegalize;
3477}
3478
3479/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3480/// TargetReg, while preserving other bits in \p TargetReg.
3481///
3482/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3484 Register TargetReg, Register InsertReg,
3485 Register OffsetBits) {
3486 LLT TargetTy = B.getMRI()->getType(TargetReg);
3487 LLT InsertTy = B.getMRI()->getType(InsertReg);
3488 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3489 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3490
3491 // Produce a bitmask of the value to insert
3492 auto EltMask = B.buildConstant(
3493 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3494 InsertTy.getSizeInBits()));
3495 // Shift it into position
3496 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3497 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3498
3499 // Clear out the bits in the wide element
3500 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3501
3502 // The value to insert has all zeros already, so stick it into the masked
3503 // wide element.
3504 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3505}
3506
3507/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3508/// is increasing the element size, perform the indexing in the target element
3509/// type, and use bit operations to insert at the element position. This is
3510/// intended for architectures that can dynamically index the register file and
3511/// want to force indexing in the native register size.
3514 LLT CastTy) {
3515 if (TypeIdx != 0)
3516 return UnableToLegalize;
3517
3518 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3519 MI.getFirst4RegLLTs();
3520 LLT VecTy = DstTy;
3521
3522 LLT VecEltTy = VecTy.getElementType();
3523 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3524 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3525 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3526
3527 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3528 unsigned OldNumElts = VecTy.getNumElements();
3529
3530 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3531 if (NewNumElts < OldNumElts) {
3532 if (NewEltSize % OldEltSize != 0)
3533 return UnableToLegalize;
3534
3535 // This only depends on powers of 2 because we use bit tricks to figure out
3536 // the bit offset we need to shift to get the target element. A general
3537 // expansion could emit division/multiply.
3538 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3539 return UnableToLegalize;
3540
3541 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3542 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3543
3544 // Divide to get the index in the wider element type.
3545 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3546
3547 Register ExtractedElt = CastVec;
3548 if (CastTy.isVector()) {
3549 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3550 ScaledIdx).getReg(0);
3551 }
3552
3553 // Compute the bit offset into the register of the target element.
3555 MIRBuilder, Idx, NewEltSize, OldEltSize);
3556
3557 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3558 Val, OffsetBits);
3559 if (CastTy.isVector()) {
3561 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3562 }
3563
3564 MIRBuilder.buildBitcast(Dst, InsertedElt);
3565 MI.eraseFromParent();
3566 return Legalized;
3567 }
3568
3569 return UnableToLegalize;
3570}
3571
3572// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3573// those that have smaller than legal operands.
3574//
3575// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3576//
3577// ===>
3578//
3579// s32 = G_BITCAST <4 x s8>
3580// s32 = G_BITCAST <4 x s8>
3581// s32 = G_BITCAST <4 x s8>
3582// s32 = G_BITCAST <4 x s8>
3583// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3584// <16 x s8> = G_BITCAST <4 x s32>
3587 LLT CastTy) {
3588 // Convert it to CONCAT instruction
3589 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3590 if (!ConcatMI) {
3591 return UnableToLegalize;
3592 }
3593
3594 // Check if bitcast is Legal
3595 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3596 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3597
3598 // Check if the build vector is Legal
3599 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3600 return UnableToLegalize;
3601 }
3602
3603 // Bitcast the sources
3604 SmallVector<Register> BitcastRegs;
3605 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3606 BitcastRegs.push_back(
3607 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3608 .getReg(0));
3609 }
3610
3611 // Build the scalar values into a vector
3612 Register BuildReg =
3613 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3614 MIRBuilder.buildBitcast(DstReg, BuildReg);
3615
3616 MI.eraseFromParent();
3617 return Legalized;
3618}
3619
3621 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3622 Register DstReg = LoadMI.getDstReg();
3623 Register PtrReg = LoadMI.getPointerReg();
3624 LLT DstTy = MRI.getType(DstReg);
3625 MachineMemOperand &MMO = LoadMI.getMMO();
3626 LLT MemTy = MMO.getMemoryType();
3628
3629 unsigned MemSizeInBits = MemTy.getSizeInBits();
3630 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3631
3632 if (MemSizeInBits != MemStoreSizeInBits) {
3633 if (MemTy.isVector())
3634 return UnableToLegalize;
3635
3636 // Promote to a byte-sized load if not loading an integral number of
3637 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3638 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3639 MachineMemOperand *NewMMO =
3640 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3641
3642 Register LoadReg = DstReg;
3643 LLT LoadTy = DstTy;
3644
3645 // If this wasn't already an extending load, we need to widen the result
3646 // register to avoid creating a load with a narrower result than the source.
3647 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3648 LoadTy = WideMemTy;
3649 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3650 }
3651
3652 if (isa<GSExtLoad>(LoadMI)) {
3653 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3654 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3655 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3656 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3657 // The extra bits are guaranteed to be zero, since we stored them that
3658 // way. A zext load from Wide thus automatically gives zext from MemVT.
3659 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3660 } else {
3661 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3662 }
3663
3664 if (DstTy != LoadTy)
3665 MIRBuilder.buildTrunc(DstReg, LoadReg);
3666
3667 LoadMI.eraseFromParent();
3668 return Legalized;
3669 }
3670
3671 // Big endian lowering not implemented.
3673 return UnableToLegalize;
3674
3675 // This load needs splitting into power of 2 sized loads.
3676 //
3677 // Our strategy here is to generate anyextending loads for the smaller
3678 // types up to next power-2 result type, and then combine the two larger
3679 // result values together, before truncating back down to the non-pow-2
3680 // type.
3681 // E.g. v1 = i24 load =>
3682 // v2 = i32 zextload (2 byte)
3683 // v3 = i32 load (1 byte)
3684 // v4 = i32 shl v3, 16
3685 // v5 = i32 or v4, v2
3686 // v1 = i24 trunc v5
3687 // By doing this we generate the correct truncate which should get
3688 // combined away as an artifact with a matching extend.
3689
3690 uint64_t LargeSplitSize, SmallSplitSize;
3691
3692 if (!isPowerOf2_32(MemSizeInBits)) {
3693 // This load needs splitting into power of 2 sized loads.
3694 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3695 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3696 } else {
3697 // This is already a power of 2, but we still need to split this in half.
3698 //
3699 // Assume we're being asked to decompose an unaligned load.
3700 // TODO: If this requires multiple splits, handle them all at once.
3701 auto &Ctx = MF.getFunction().getContext();
3702 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3703 return UnableToLegalize;
3704
3705 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3706 }
3707
3708 if (MemTy.isVector()) {
3709 // TODO: Handle vector extloads
3710 if (MemTy != DstTy)
3711 return UnableToLegalize;
3712
3713 // TODO: We can do better than scalarizing the vector and at least split it
3714 // in half.
3715 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3716 }
3717
3718 MachineMemOperand *LargeMMO =
3719 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3720 MachineMemOperand *SmallMMO =
3721 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3722
3723 LLT PtrTy = MRI.getType(PtrReg);
3724 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3725 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3726 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3727 PtrReg, *LargeMMO);
3728
3729 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3730 LargeSplitSize / 8);
3731 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3732 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3733 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3734 SmallPtr, *SmallMMO);
3735
3736 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3737 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3738
3739 if (AnyExtTy == DstTy)
3740 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3741 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3742 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3743 MIRBuilder.buildTrunc(DstReg, {Or});
3744 } else {
3745 assert(DstTy.isPointer() && "expected pointer");
3746 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3747
3748 // FIXME: We currently consider this to be illegal for non-integral address
3749 // spaces, but we need still need a way to reinterpret the bits.
3750 MIRBuilder.buildIntToPtr(DstReg, Or);
3751 }
3752
3753 LoadMI.eraseFromParent();
3754 return Legalized;
3755}
3756
3758 // Lower a non-power of 2 store into multiple pow-2 stores.
3759 // E.g. split an i24 store into an i16 store + i8 store.
3760 // We do this by first extending the stored value to the next largest power
3761 // of 2 type, and then using truncating stores to store the components.
3762 // By doing this, likewise with G_LOAD, generate an extend that can be
3763 // artifact-combined away instead of leaving behind extracts.
3764 Register SrcReg = StoreMI.getValueReg();
3765 Register PtrReg = StoreMI.getPointerReg();
3766 LLT SrcTy = MRI.getType(SrcReg);
3768 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3769 LLT MemTy = MMO.getMemoryType();
3770
3771 unsigned StoreWidth = MemTy.getSizeInBits();
3772 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3773
3774 if (StoreWidth != StoreSizeInBits) {
3775 if (SrcTy.isVector())
3776 return UnableToLegalize;
3777
3778 // Promote to a byte-sized store with upper bits zero if not
3779 // storing an integral number of bytes. For example, promote
3780 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3781 LLT WideTy = LLT::scalar(StoreSizeInBits);
3782
3783 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3784 // Avoid creating a store with a narrower source than result.
3785 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3786 SrcTy = WideTy;
3787 }
3788
3789 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3790
3791 MachineMemOperand *NewMMO =
3792 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3793 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3794 StoreMI.eraseFromParent();
3795 return Legalized;
3796 }
3797
3798 if (MemTy.isVector()) {
3799 // TODO: Handle vector trunc stores
3800 if (MemTy != SrcTy)
3801 return UnableToLegalize;
3802
3803 // TODO: We can do better than scalarizing the vector and at least split it
3804 // in half.
3805 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3806 }
3807
3808 unsigned MemSizeInBits = MemTy.getSizeInBits();
3809 uint64_t LargeSplitSize, SmallSplitSize;
3810
3811 if (!isPowerOf2_32(MemSizeInBits)) {
3812 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3813 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3814 } else {
3815 auto &Ctx = MF.getFunction().getContext();
3816 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3817 return UnableToLegalize; // Don't know what we're being asked to do.
3818
3819 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3820 }
3821
3822 // Extend to the next pow-2. If this store was itself the result of lowering,
3823 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3824 // that's wider than the stored size.
3825 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3826 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3827
3828 if (SrcTy.isPointer()) {
3829 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3830 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3831 }
3832
3833 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3834
3835 // Obtain the smaller value by shifting away the larger value.
3836 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3837 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3838
3839 // Generate the PtrAdd and truncating stores.
3840 LLT PtrTy = MRI.getType(PtrReg);
3841 auto OffsetCst = MIRBuilder.buildConstant(
3842 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3843 auto SmallPtr =
3844 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3845
3846 MachineMemOperand *LargeMMO =
3847 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3848 MachineMemOperand *SmallMMO =
3849 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3850 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3851 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3852 StoreMI.eraseFromParent();
3853 return Legalized;
3854}
3855
3857LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3858 switch (MI.getOpcode()) {
3859 case TargetOpcode::G_LOAD: {
3860 if (TypeIdx != 0)
3861 return UnableToLegalize;
3862 MachineMemOperand &MMO = **MI.memoperands_begin();
3863
3864 // Not sure how to interpret a bitcast of an extending load.
3865 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3866 return UnableToLegalize;
3867
3869 bitcastDst(MI, CastTy, 0);
3870 MMO.setType(CastTy);
3871 // The range metadata is no longer valid when reinterpreted as a different
3872 // type.
3873 MMO.clearRanges();
3875 return Legalized;
3876 }
3877 case TargetOpcode::G_STORE: {
3878 if (TypeIdx != 0)
3879 return UnableToLegalize;
3880
3881 MachineMemOperand &MMO = **MI.memoperands_begin();
3882
3883 // Not sure how to interpret a bitcast of a truncating store.
3884 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3885 return UnableToLegalize;
3886
3888 bitcastSrc(MI, CastTy, 0);
3889 MMO.setType(CastTy);
3891 return Legalized;
3892 }
3893 case TargetOpcode::G_SELECT: {
3894 if (TypeIdx != 0)
3895 return UnableToLegalize;
3896
3897 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3898 LLVM_DEBUG(
3899 dbgs() << "bitcast action not implemented for vector select\n");
3900 return UnableToLegalize;
3901 }
3902
3904 bitcastSrc(MI, CastTy, 2);
3905 bitcastSrc(MI, CastTy, 3);
3906 bitcastDst(MI, CastTy, 0);
3908 return Legalized;
3909 }
3910 case TargetOpcode::G_AND:
3911 case TargetOpcode::G_OR:
3912 case TargetOpcode::G_XOR: {
3914 bitcastSrc(MI, CastTy, 1);
3915 bitcastSrc(MI, CastTy, 2);
3916 bitcastDst(MI, CastTy, 0);
3918 return Legalized;
3919 }
3920 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3921 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3922 case TargetOpcode::G_INSERT_VECTOR_ELT:
3923 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3924 case TargetOpcode::G_CONCAT_VECTORS:
3925 return bitcastConcatVector(MI, TypeIdx, CastTy);
3926 default:
3927 return UnableToLegalize;
3928 }
3929}
3930
3931// Legalize an instruction by changing the opcode in place.
3932void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3934 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3936}
3937
3939LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3940 using namespace TargetOpcode;
3941
3942 switch(MI.getOpcode()) {
3943 default:
3944 return UnableToLegalize;
3945 case TargetOpcode::G_FCONSTANT:
3946 return lowerFConstant(MI);
3947 case TargetOpcode::G_BITCAST:
3948 return lowerBitcast(MI);
3949 case TargetOpcode::G_SREM:
3950 case TargetOpcode::G_UREM: {
3951 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3952 auto Quot =
3953 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3954 {MI.getOperand(1), MI.getOperand(2)});
3955
3956 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3957 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3958 MI.eraseFromParent();
3959 return Legalized;
3960 }
3961 case TargetOpcode::G_SADDO:
3962 case TargetOpcode::G_SSUBO:
3963 return lowerSADDO_SSUBO(MI);
3964 case TargetOpcode::G_UMULH:
3965 case TargetOpcode::G_SMULH:
3966 return lowerSMULH_UMULH(MI);
3967 case TargetOpcode::G_SMULO:
3968 case TargetOpcode::G_UMULO: {
3969 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3970 // result.
3971 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3972 LLT Ty = MRI.getType(Res);
3973
3974 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3975 ? TargetOpcode::G_SMULH
3976 : TargetOpcode::G_UMULH;
3977
3979 const auto &TII = MIRBuilder.getTII();
3980 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3981 MI.removeOperand(1);
3983
3984 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3985 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3986
3987 // Move insert point forward so we can use the Res register if needed.
3989
3990 // For *signed* multiply, overflow is detected by checking:
3991 // (hi != (lo >> bitwidth-1))
3992 if (Opcode == TargetOpcode::G_SMULH) {
3993 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3994 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3995 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3996 } else {
3997 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3998 }
3999 return Legalized;
4000 }
4001 case TargetOpcode::G_FNEG: {
4002 auto [Res, SubByReg] = MI.getFirst2Regs();
4003 LLT Ty = MRI.getType(Res);
4004
4005 // TODO: Handle vector types once we are able to
4006 // represent them.
4007 if (Ty.isVector())
4008 return UnableToLegalize;
4009 auto SignMask =
4011 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4012 MI.eraseFromParent();
4013 return Legalized;
4014 }
4015 case TargetOpcode::G_FSUB:
4016 case TargetOpcode::G_STRICT_FSUB: {
4017 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4018 LLT Ty = MRI.getType(Res);
4019
4020 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4021 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4022
4023 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4024 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4025 else
4026 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4027
4028 MI.eraseFromParent();
4029 return Legalized;
4030 }
4031 case TargetOpcode::G_FMAD:
4032 return lowerFMad(MI);
4033 case TargetOpcode::G_FFLOOR:
4034 return lowerFFloor(MI);
4035 case TargetOpcode::G_LROUND:
4036 case TargetOpcode::G_LLROUND: {
4037 Register DstReg = MI.getOperand(0).getReg();
4038 Register SrcReg = MI.getOperand(1).getReg();
4039 LLT SrcTy = MRI.getType(SrcReg);
4040 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4041 {SrcReg});
4042 MIRBuilder.buildFPTOSI(DstReg, Round);
4043 MI.eraseFromParent();
4044 return Legalized;
4045 }
4046 case TargetOpcode::G_INTRINSIC_ROUND:
4047 return lowerIntrinsicRound(MI);
4048 case TargetOpcode::G_FRINT: {
4049 // Since round even is the assumed rounding mode for unconstrained FP
4050 // operations, rint and roundeven are the same operation.
4051 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4052 return Legalized;
4053 }
4054 case TargetOpcode::G_INTRINSIC_LRINT:
4055 case TargetOpcode::G_INTRINSIC_LLRINT: {
4056 Register DstReg = MI.getOperand(0).getReg();
4057 Register SrcReg = MI.getOperand(1).getReg();
4058 LLT SrcTy = MRI.getType(SrcReg);
4059 auto Round =
4060 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4061 MIRBuilder.buildFPTOSI(DstReg, Round);
4062 MI.eraseFromParent();
4063 return Legalized;
4064 }
4065 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4066 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4067 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4068 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4069 **MI.memoperands_begin());
4070 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4071 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4072 MI.eraseFromParent();
4073 return Legalized;
4074 }
4075 case TargetOpcode::G_LOAD:
4076 case TargetOpcode::G_SEXTLOAD:
4077 case TargetOpcode::G_ZEXTLOAD:
4078 return lowerLoad(cast<GAnyLoad>(MI));
4079 case TargetOpcode::G_STORE:
4080 return lowerStore(cast<GStore>(MI));
4081 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4082 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4083 case TargetOpcode::G_CTLZ:
4084 case TargetOpcode::G_CTTZ:
4085 case TargetOpcode::G_CTPOP:
4086 return lowerBitCount(MI);
4087 case G_UADDO: {
4088 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4089
4090 Register NewRes = MRI.cloneVirtualRegister(Res);
4091
4092 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4093 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4094
4095 MIRBuilder.buildCopy(Res, NewRes);
4096
4097 MI.eraseFromParent();
4098 return Legalized;
4099 }
4100 case G_UADDE: {
4101 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4102 const LLT CondTy = MRI.getType(CarryOut);
4103 const LLT Ty = MRI.getType(Res);
4104
4105 Register NewRes = MRI.cloneVirtualRegister(Res);
4106
4107 // Initial add of the two operands.
4108 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4109
4110 // Initial check for carry.
4111 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4112
4113 // Add the sum and the carry.
4114 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4115 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4116
4117 // Second check for carry. We can only carry if the initial sum is all 1s
4118 // and the carry is set, resulting in a new sum of 0.
4119 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4120 auto ResEqZero =
4121 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4122 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4123 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4124
4125 MIRBuilder.buildCopy(Res, NewRes);
4126
4127 MI.eraseFromParent();
4128 return Legalized;
4129 }
4130 case G_USUBO: {
4131 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4132
4133 MIRBuilder.buildSub(Res, LHS, RHS);
4135
4136 MI.eraseFromParent();
4137 return Legalized;
4138 }
4139 case G_USUBE: {
4140 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4141 const LLT CondTy = MRI.getType(BorrowOut);
4142 const LLT Ty = MRI.getType(Res);
4143
4144 // Initial subtract of the two operands.
4145 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4146
4147 // Initial check for borrow.
4148 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4149
4150 // Subtract the borrow from the first subtract.
4151 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4152 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4153
4154 // Second check for borrow. We can only borrow if the initial difference is
4155 // 0 and the borrow is set, resulting in a new difference of all 1s.
4156 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4157 auto TmpResEqZero =
4158 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4159 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4160 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4161
4162 MI.eraseFromParent();
4163 return Legalized;
4164 }
4165 case G_UITOFP:
4166 return lowerUITOFP(MI);
4167 case G_SITOFP:
4168 return lowerSITOFP(MI);
4169 case G_FPTOUI:
4170 return lowerFPTOUI(MI);
4171 case G_FPTOSI:
4172 return lowerFPTOSI(MI);
4173 case G_FPTRUNC:
4174 return lowerFPTRUNC(MI);
4175 case G_FPOWI:
4176 return lowerFPOWI(MI);
4177 case G_SMIN:
4178 case G_SMAX:
4179 case G_UMIN:
4180 case G_UMAX:
4181 return lowerMinMax(MI);
4182 case G_SCMP:
4183 case G_UCMP:
4184 return lowerThreewayCompare(MI);
4185 case G_FCOPYSIGN:
4186 return lowerFCopySign(MI);
4187 case G_FMINNUM:
4188 case G_FMAXNUM:
4189 return lowerFMinNumMaxNum(MI);
4190 case G_MERGE_VALUES:
4191 return lowerMergeValues(MI);
4192 case G_UNMERGE_VALUES:
4193 return lowerUnmergeValues(MI);
4194 case TargetOpcode::G_SEXT_INREG: {
4195 assert(MI.getOperand(2).isImm() && "Expected immediate");
4196 int64_t SizeInBits = MI.getOperand(2).getImm();
4197
4198 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4199 LLT DstTy = MRI.getType(DstReg);
4200 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4201
4202 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4203 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4204 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4205 MI.eraseFromParent();
4206 return Legalized;
4207 }
4208 case G_EXTRACT_VECTOR_ELT:
4209 case G_INSERT_VECTOR_ELT:
4211 case G_SHUFFLE_VECTOR:
4212 return lowerShuffleVector(MI);
4213 case G_VECTOR_COMPRESS:
4214 return lowerVECTOR_COMPRESS(MI);
4215 case G_DYN_STACKALLOC:
4216 return lowerDynStackAlloc(MI);
4217 case G_STACKSAVE:
4218 return lowerStackSave(MI);
4219 case G_STACKRESTORE:
4220 return lowerStackRestore(MI);
4221 case G_EXTRACT:
4222 return lowerExtract(MI);
4223 case G_INSERT:
4224 return lowerInsert(MI);
4225 case G_BSWAP:
4226 return lowerBswap(MI);
4227 case G_BITREVERSE:
4228 return lowerBitreverse(MI);
4229 case G_READ_REGISTER:
4230 case G_WRITE_REGISTER:
4231 return lowerReadWriteRegister(MI);
4232 case G_UADDSAT:
4233 case G_USUBSAT: {
4234 // Try to make a reasonable guess about which lowering strategy to use. The
4235 // target can override this with custom lowering and calling the
4236 // implementation functions.
4237 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4238 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4239 return lowerAddSubSatToMinMax(MI);
4241 }
4242 case G_SADDSAT:
4243 case G_SSUBSAT: {
4244 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4245
4246 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4247 // since it's a shorter expansion. However, we would need to figure out the
4248 // preferred boolean type for the carry out for the query.
4249 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4250 return lowerAddSubSatToMinMax(MI);
4252 }
4253 case G_SSHLSAT:
4254 case G_USHLSAT:
4255 return lowerShlSat(MI);
4256 case G_ABS:
4257 return lowerAbsToAddXor(MI);
4258 case G_SELECT:
4259 return lowerSelect(MI);
4260 case G_IS_FPCLASS:
4261 return lowerISFPCLASS(MI);
4262 case G_SDIVREM:
4263 case G_UDIVREM:
4264 return lowerDIVREM(MI);
4265 case G_FSHL:
4266 case G_FSHR:
4267 return lowerFunnelShift(MI);
4268 case G_ROTL:
4269 case G_ROTR:
4270 return lowerRotate(MI);
4271 case G_MEMSET:
4272 case G_MEMCPY:
4273 case G_MEMMOVE:
4274 return lowerMemCpyFamily(MI);
4275 case G_MEMCPY_INLINE:
4276 return lowerMemcpyInline(MI);
4277 case G_ZEXT:
4278 case G_SEXT:
4279 case G_ANYEXT:
4280 return lowerEXT(MI);
4281 case G_TRUNC:
4282 return lowerTRUNC(MI);
4284 return lowerVectorReduction(MI);
4285 case G_VAARG:
4286 return lowerVAArg(MI);
4287 }
4288}
4289
4291 Align MinAlign) const {
4292 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4293 // datalayout for the preferred alignment. Also there should be a target hook
4294 // for this to allow targets to reduce the alignment and ignore the
4295 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4296 // the type.
4297 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4298}
4299
4302 MachinePointerInfo &PtrInfo) {
4305 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4306
4307 unsigned AddrSpace = DL.getAllocaAddrSpace();
4308 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4309
4310 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4311 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4312}
4313
4315 LLT VecTy) {
4316 LLT IdxTy = B.getMRI()->getType(IdxReg);
4317 unsigned NElts = VecTy.getNumElements();
4318
4319 int64_t IdxVal;
4320 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4321 if (IdxVal < VecTy.getNumElements())
4322 return IdxReg;
4323 // If a constant index would be out of bounds, clamp it as well.
4324 }
4325
4326 if (isPowerOf2_32(NElts)) {
4327 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4328 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4329 }
4330
4331 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4332 .getReg(0);
4333}
4334
4336 Register Index) {
4337 LLT EltTy = VecTy.getElementType();
4338
4339 // Calculate the element offset and add it to the pointer.
4340 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4341 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4342 "Converting bits to bytes lost precision");
4343
4345
4346 // Convert index to the correct size for the address space.
4348 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4349 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4350 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4351 if (IdxTy != MRI.getType(Index))
4353
4354 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4355 MIRBuilder.buildConstant(IdxTy, EltSize));
4356
4357 LLT PtrTy = MRI.getType(VecPtr);
4358 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4359}
4360
4361#ifndef NDEBUG
4362/// Check that all vector operands have same number of elements. Other operands
4363/// should be listed in NonVecOp.
4366 std::initializer_list<unsigned> NonVecOpIndices) {
4367 if (MI.getNumMemOperands() != 0)
4368 return false;
4369
4370 LLT VecTy = MRI.getType(MI.getReg(0));
4371 if (!VecTy.isVector())
4372 return false;
4373 unsigned NumElts = VecTy.getNumElements();
4374
4375 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4376 MachineOperand &Op = MI.getOperand(OpIdx);
4377 if (!Op.isReg()) {
4378 if (!is_contained(NonVecOpIndices, OpIdx))
4379 return false;
4380 continue;
4381 }
4382
4383 LLT Ty = MRI.getType(Op.getReg());
4384 if (!Ty.isVector()) {
4385 if (!is_contained(NonVecOpIndices, OpIdx))
4386 return false;
4387 continue;
4388 }
4389
4390 if (Ty.getNumElements() != NumElts)
4391 return false;
4392 }
4393
4394 return true;
4395}
4396#endif
4397
4398/// Fill \p DstOps with DstOps that have same number of elements combined as
4399/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4400/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4401/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4402static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4403 unsigned NumElts) {
4404 LLT LeftoverTy;
4405 assert(Ty.isVector() && "Expected vector type");
4406 LLT EltTy = Ty.getElementType();
4407 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4408 int NumParts, NumLeftover;
4409 std::tie(NumParts, NumLeftover) =
4410 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4411
4412 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4413 for (int i = 0; i < NumParts; ++i) {
4414 DstOps.push_back(NarrowTy);
4415 }
4416
4417 if (LeftoverTy.isValid()) {
4418 assert(NumLeftover == 1 && "expected exactly one leftover");
4419 DstOps.push_back(LeftoverTy);
4420 }
4421}
4422
4423/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4424/// made from \p Op depending on operand type.
4425static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4426 MachineOperand &Op) {
4427 for (unsigned i = 0; i < N; ++i) {
4428 if (Op.isReg())
4429 Ops.push_back(Op.getReg());
4430 else if (Op.isImm())
4431 Ops.push_back(Op.getImm());
4432 else if (Op.isPredicate())
4433 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4434 else
4435 llvm_unreachable("Unsupported type");
4436 }
4437}
4438
4439// Handle splitting vector operations which need to have the same number of
4440// elements in each type index, but each type index may have a different element
4441// type.
4442//
4443// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4444// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4445// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4446//
4447// Also handles some irregular breakdown cases, e.g.
4448// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4449// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4450// s64 = G_SHL s64, s32
4453 GenericMachineInstr &MI, unsigned NumElts,
4454 std::initializer_list<unsigned> NonVecOpIndices) {
4455 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4456 "Non-compatible opcode or not specified non-vector operands");
4457 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4458
4459 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4460 unsigned NumDefs = MI.getNumDefs();
4461
4462 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4463 // Build instructions with DstOps to use instruction found by CSE directly.
4464 // CSE copies found instruction into given vreg when building with vreg dest.
4465 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4466 // Output registers will be taken from created instructions.
4467 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4468 for (unsigned i = 0; i < NumDefs; ++i) {
4469 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4470 }
4471
4472 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4473 // Operands listed in NonVecOpIndices will be used as is without splitting;
4474 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4475 // scalar condition (op 1), immediate in sext_inreg (op 2).
4476 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4477 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4478 ++UseIdx, ++UseNo) {
4479 if (is_contained(NonVecOpIndices, UseIdx)) {
4480 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4481 MI.getOperand(UseIdx));
4482 } else {
4483 SmallVector<Register, 8> SplitPieces;
4484 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4485 MRI);
4486 for (auto Reg : SplitPieces)
4487 InputOpsPieces[UseNo].push_back(Reg);
4488 }
4489 }
4490
4491 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4492
4493 // Take i-th piece of each input operand split and build sub-vector/scalar
4494 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4495 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4497 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4498 Defs.push_back(OutputOpsPieces[DstNo][i]);
4499
4501 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4502 Uses.push_back(InputOpsPieces[InputNo][i]);
4503
4504 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4505 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4506 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4507 }
4508
4509 // Merge small outputs into MI's output for each def operand.
4510 if (NumLeftovers) {
4511 for (unsigned i = 0; i < NumDefs; ++i)
4512 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4513 } else {
4514 for (unsigned i = 0; i < NumDefs; ++i)
4515 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4516 }
4517
4518 MI.eraseFromParent();
4519 return Legalized;
4520}
4521
4524 unsigned NumElts) {
4525 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4526
4527 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4528 unsigned NumDefs = MI.getNumDefs();
4529
4530 SmallVector<DstOp, 8> OutputOpsPieces;
4531 SmallVector<Register, 8> OutputRegs;
4532 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4533
4534 // Instructions that perform register split will be inserted in basic block
4535 // where register is defined (basic block is in the next operand).
4536 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4537 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4538 UseIdx += 2, ++UseNo) {
4539 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4541 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4542 MIRBuilder, MRI);
4543 }
4544
4545 // Build PHIs with fewer elements.
4546 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4547 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4548 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4549 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4550 Phi.addDef(
4551 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4552 OutputRegs.push_back(Phi.getReg(0));
4553
4554 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4555 Phi.addUse(InputOpsPieces[j][i]);
4556 Phi.add(MI.getOperand(1 + j * 2 + 1));
4557 }
4558 }
4559
4560 // Set the insert point after the existing PHIs
4561 MachineBasicBlock &MBB = *MI.getParent();
4563
4564 // Merge small outputs into MI's def.
4565 if (NumLeftovers) {
4566 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4567 } else {
4568 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4569 }
4570
4571 MI.eraseFromParent();
4572 return Legalized;
4573}
4574
4577 unsigned TypeIdx,
4578 LLT NarrowTy) {
4579 const int NumDst = MI.getNumOperands() - 1;
4580 const Register SrcReg = MI.getOperand(NumDst).getReg();
4581 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4582 LLT SrcTy = MRI.getType(SrcReg);
4583
4584 if (TypeIdx != 1 || NarrowTy == DstTy)
4585 return UnableToLegalize;
4586
4587 // Requires compatible types. Otherwise SrcReg should have been defined by
4588 // merge-like instruction that would get artifact combined. Most likely
4589 // instruction that defines SrcReg has to perform more/fewer elements
4590 // legalization compatible with NarrowTy.
4591 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4592 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4593
4594 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4595 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4596 return UnableToLegalize;
4597
4598 // This is most likely DstTy (smaller then register size) packed in SrcTy
4599 // (larger then register size) and since unmerge was not combined it will be
4600 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4601 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4602
4603 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4604 //
4605 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4606 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4607 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4608 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4609 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4610 const int PartsPerUnmerge = NumDst / NumUnmerge;
4611
4612 for (int I = 0; I != NumUnmerge; ++I) {
4613 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4614
4615 for (int J = 0; J != PartsPerUnmerge; ++J)
4616 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4617 MIB.addUse(Unmerge.getReg(I));
4618 }
4619
4620 MI.eraseFromParent();
4621 return Legalized;
4622}
4623
4626 LLT NarrowTy) {
4627 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4628 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4629 // that should have been artifact combined. Most likely instruction that uses
4630 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4631 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4632 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4633 if (NarrowTy == SrcTy)
4634 return UnableToLegalize;
4635
4636 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4637 // is for old mir tests. Since the changes to more/fewer elements it should no
4638 // longer be possible to generate MIR like this when starting from llvm-ir
4639 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4640 if (TypeIdx == 1) {
4641 assert(SrcTy.isVector() && "Expected vector types");
4642 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4643 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4644 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4645 return UnableToLegalize;
4646 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4647 //
4648 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4649 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4650 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4651 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4652 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4653 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4654
4656 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4657 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4658 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4659 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4660 Elts.push_back(Unmerge.getReg(j));
4661 }
4662
4663 SmallVector<Register, 8> NarrowTyElts;
4664 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4665 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4666 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4667 ++i, Offset += NumNarrowTyElts) {
4668 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4669 NarrowTyElts.push_back(
4670 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4671 }
4672
4673 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4674 MI.eraseFromParent();
4675 return Legalized;
4676 }
4677
4678 assert(TypeIdx == 0 && "Bad type index");
4679 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4680 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4681 return UnableToLegalize;
4682
4683 // This is most likely SrcTy (smaller then register size) packed in DstTy
4684 // (larger then register size) and since merge was not combined it will be
4685 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4686 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4687
4688 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4689 //
4690 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4691 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4692 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4693 SmallVector<Register, 8> NarrowTyElts;
4694 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4695 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4696 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4697 for (unsigned i = 0; i < NumParts; ++i) {
4699 for (unsigned j = 0; j < NumElts; ++j)
4700 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4701 NarrowTyElts.push_back(
4702 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4703 }
4704
4705 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4706 MI.eraseFromParent();
4707 return Legalized;
4708}
4709
4712 unsigned TypeIdx,
4713 LLT NarrowVecTy) {
4714 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4715 Register InsertVal;
4716 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4717
4718 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4719 if (IsInsert)
4720 InsertVal = MI.getOperand(2).getReg();
4721
4722 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4723
4724 // TODO: Handle total scalarization case.
4725 if (!NarrowVecTy.isVector())
4726 return UnableToLegalize;
4727
4728 LLT VecTy = MRI.getType(SrcVec);
4729
4730 // If the index is a constant, we can really break this down as you would
4731 // expect, and index into the target size pieces.
4732 int64_t IdxVal;
4733 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4734 if (MaybeCst) {
4735 IdxVal = MaybeCst->Value.getSExtValue();
4736 // Avoid out of bounds indexing the pieces.
4737 if (IdxVal >= VecTy.getNumElements()) {
4738 MIRBuilder.buildUndef(DstReg);
4739 MI.eraseFromParent();
4740 return Legalized;
4741 }
4742
4743 SmallVector<Register, 8> VecParts;
4744 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4745
4746 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4747 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4748 TargetOpcode::G_ANYEXT);
4749
4750 unsigned NewNumElts = NarrowVecTy.getNumElements();
4751
4752 LLT IdxTy = MRI.getType(Idx);
4753 int64_t PartIdx = IdxVal / NewNumElts;
4754 auto NewIdx =
4755 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4756
4757 if (IsInsert) {
4758 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4759
4760 // Use the adjusted index to insert into one of the subvectors.
4761 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4762 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4763 VecParts[PartIdx] = InsertPart.getReg(0);
4764
4765 // Recombine the inserted subvector with the others to reform the result
4766 // vector.
4767 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4768 } else {
4769 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4770 }
4771
4772 MI.eraseFromParent();
4773 return Legalized;
4774 }
4775
4776 // With a variable index, we can't perform the operation in a smaller type, so
4777 // we're forced to expand this.
4778 //
4779 // TODO: We could emit a chain of compare/select to figure out which piece to
4780 // index.
4782}
4783
4786 LLT NarrowTy) {
4787 // FIXME: Don't know how to handle secondary types yet.
4788 if (TypeIdx != 0)
4789 return UnableToLegalize;
4790
4791 // This implementation doesn't work for atomics. Give up instead of doing
4792 // something invalid.
4793 if (LdStMI.isAtomic())
4794 return UnableToLegalize;
4795
4796 bool IsLoad = isa<GLoad>(LdStMI);
4797 Register ValReg = LdStMI.getReg(0);
4798 Register AddrReg = LdStMI.getPointerReg();
4799 LLT ValTy = MRI.getType(ValReg);
4800
4801 // FIXME: Do we need a distinct NarrowMemory legalize action?
4802 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4803 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4804 return UnableToLegalize;
4805 }
4806
4807 int NumParts = -1;
4808 int NumLeftover = -1;
4809 LLT LeftoverTy;
4810 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4811 if (IsLoad) {
4812 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4813 } else {
4814 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4815 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4816 NumParts = NarrowRegs.size();
4817 NumLeftover = NarrowLeftoverRegs.size();
4818 }
4819 }
4820
4821 if (NumParts == -1)
4822 return UnableToLegalize;
4823
4824 LLT PtrTy = MRI.getType(AddrReg);
4825 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4826
4827 unsigned TotalSize = ValTy.getSizeInBits();
4828
4829 // Split the load/store into PartTy sized pieces starting at Offset. If this
4830 // is a load, return the new registers in ValRegs. For a store, each elements
4831 // of ValRegs should be PartTy. Returns the next offset that needs to be
4832 // handled.
4834 auto MMO = LdStMI.getMMO();
4835 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4836 unsigned NumParts, unsigned Offset) -> unsigned {
4838 unsigned PartSize = PartTy.getSizeInBits();
4839 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4840 ++Idx) {
4841 unsigned ByteOffset = Offset / 8;
4842 Register NewAddrReg;
4843
4844 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4845
4846 MachineMemOperand *NewMMO =
4847 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4848
4849 if (IsLoad) {
4850 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4851 ValRegs.push_back(Dst);
4852 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4853 } else {
4854 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4855 }
4856 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4857 }
4858
4859 return Offset;
4860 };
4861
4862 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4863 unsigned HandledOffset =
4864 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4865
4866 // Handle the rest of the register if this isn't an even type breakdown.
4867 if (LeftoverTy.isValid())
4868 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4869
4870 if (IsLoad) {
4871 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4872 LeftoverTy, NarrowLeftoverRegs);
4873 }
4874
4875 LdStMI.eraseFromParent();
4876 return Legalized;
4877}
4878
4881 LLT NarrowTy) {
4882 using namespace TargetOpcode;
4883 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4884 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4885
4886 switch (MI.getOpcode()) {
4887 case G_IMPLICIT_DEF:
4888 case G_TRUNC:
4889 case G_AND:
4890 case G_OR:
4891 case G_XOR:
4892 case G_ADD:
4893 case G_SUB:
4894 case G_MUL:
4895 case G_PTR_ADD:
4896 case G_SMULH:
4897 case G_UMULH:
4898 case G_FADD:
4899 case G_FMUL:
4900 case G_FSUB:
4901 case G_FNEG:
4902 case G_FABS:
4903 case G_FCANONICALIZE:
4904 case G_FDIV:
4905 case G_FREM:
4906 case G_FMA:
4907 case G_FMAD:
4908 case G_FPOW:
4909 case G_FEXP:
4910 case G_FEXP2:
4911 case G_FEXP10:
4912 case G_FLOG:
4913 case G_FLOG2:
4914 case G_FLOG10:
4915 case G_FLDEXP:
4916 case G_FNEARBYINT:
4917 case G_FCEIL:
4918 case G_FFLOOR:
4919 case G_FRINT:
4920 case G_INTRINSIC_LRINT:
4921 case G_INTRINSIC_LLRINT:
4922 case G_INTRINSIC_ROUND:
4923 case G_INTRINSIC_ROUNDEVEN:
4924 case G_INTRINSIC_TRUNC:
4925 case G_FCOS:
4926 case G_FSIN:
4927 case G_FTAN:
4928 case G_FACOS:
4929 case G_FASIN:
4930 case G_FATAN:
4931 case G_FCOSH:
4932 case G_FSINH:
4933 case G_FTANH:
4934 case G_FSQRT:
4935 case G_BSWAP:
4936 case G_BITREVERSE:
4937 case G_SDIV:
4938 case G_UDIV:
4939 case G_SREM:
4940 case G_UREM:
4941 case G_SDIVREM:
4942 case G_UDIVREM:
4943 case G_SMIN:
4944 case G_SMAX:
4945 case G_UMIN:
4946 case G_UMAX:
4947 case G_ABS:
4948 case G_FMINNUM:
4949 case G_FMAXNUM:
4950 case G_FMINNUM_IEEE:
4951 case G_FMAXNUM_IEEE:
4952 case G_FMINIMUM:
4953 case G_FMAXIMUM:
4954 case G_FSHL:
4955 case G_FSHR:
4956 case G_ROTL:
4957 case G_ROTR:
4958 case G_FREEZE:
4959 case G_SADDSAT:
4960 case G_SSUBSAT:
4961 case G_UADDSAT:
4962 case G_USUBSAT:
4963 case G_UMULO:
4964 case G_SMULO:
4965 case G_SHL:
4966 case G_LSHR:
4967 case G_ASHR:
4968 case G_SSHLSAT:
4969 case G_USHLSAT:
4970 case G_CTLZ:
4971 case G_CTLZ_ZERO_UNDEF:
4972 case G_CTTZ:
4973 case G_CTTZ_ZERO_UNDEF:
4974 case G_CTPOP:
4975 case G_FCOPYSIGN:
4976 case G_ZEXT:
4977 case G_SEXT:
4978 case G_ANYEXT:
4979 case G_FPEXT:
4980 case G_FPTRUNC:
4981 case G_SITOFP:
4982 case G_UITOFP:
4983 case G_FPTOSI:
4984 case G_FPTOUI:
4985 case G_INTTOPTR:
4986 case G_PTRTOINT:
4987 case G_ADDRSPACE_CAST:
4988 case G_UADDO:
4989 case G_USUBO:
4990 case G_UADDE:
4991 case G_USUBE:
4992 case G_SADDO:
4993 case G_SSUBO:
4994 case G_SADDE:
4995 case G_SSUBE:
4996 case G_STRICT_FADD:
4997 case G_STRICT_FSUB:
4998 case G_STRICT_FMUL:
4999 case G_STRICT_FMA:
5000 case G_STRICT_FLDEXP:
5001 case G_FFREXP:
5002 return fewerElementsVectorMultiEltType(GMI, NumElts);
5003 case G_ICMP:
5004 case G_FCMP:
5005 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5006 case G_IS_FPCLASS:
5007 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5008 case G_SELECT:
5009 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5010 return fewerElementsVectorMultiEltType(GMI, NumElts);
5011 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5012 case G_PHI:
5013 return fewerElementsVectorPhi(GMI, NumElts);
5014 case G_UNMERGE_VALUES:
5015 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5016 case G_BUILD_VECTOR:
5017 assert(TypeIdx == 0 && "not a vector type index");
5018 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5019 case G_CONCAT_VECTORS:
5020 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5021 return UnableToLegalize;
5022 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5023 case G_EXTRACT_VECTOR_ELT:
5024 case G_INSERT_VECTOR_ELT:
5025 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5026 case G_LOAD:
5027 case G_STORE:
5028 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5029 case G_SEXT_INREG:
5030 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5032 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5033 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5034 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5035 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5036 case G_SHUFFLE_VECTOR:
5037 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5038 case G_FPOWI:
5039 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5040 case G_BITCAST:
5041 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5042 case G_INTRINSIC_FPTRUNC_ROUND:
5043 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5044 default:
5045 return UnableToLegalize;
5046 }
5047}
5048
5051 LLT NarrowTy) {
5052 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5053 "Not a bitcast operation");
5054
5055 if (TypeIdx != 0)
5056 return UnableToLegalize;
5057
5058 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5059
5060 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
5061 LLT SrcNarrowTy =
5062 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
5063
5064 // Split the Src and Dst Reg into smaller registers
5065 SmallVector<Register> SrcVRegs, BitcastVRegs;
5066 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5067 return UnableToLegalize;
5068
5069 // Build new smaller bitcast instructions
5070 // Not supporting Leftover types for now but will have to
5071 for (unsigned i = 0; i < SrcVRegs.size(); i++)
5072 BitcastVRegs.push_back(
5073 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5074
5075 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5076 MI.eraseFromParent();
5077 return Legalized;
5078}
5079
5081 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5082 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5083 if (TypeIdx != 0)
5084 return UnableToLegalize;
5085
5086 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5087 MI.getFirst3RegLLTs();
5088 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5089 // The shuffle should be canonicalized by now.
5090 if (DstTy != Src1Ty)
5091 return UnableToLegalize;
5092 if (DstTy != Src2Ty)
5093 return UnableToLegalize;
5094
5095 if (!isPowerOf2_32(DstTy.getNumElements()))
5096 return UnableToLegalize;
5097
5098 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5099 // Further legalization attempts will be needed to do split further.
5100 NarrowTy =
5101 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5102 unsigned NewElts = NarrowTy.getNumElements();
5103
5104 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5105 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5106 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5107 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5108 SplitSrc2Regs[1]};
5109
5110 Register Hi, Lo;
5111
5112 // If Lo or Hi uses elements from at most two of the four input vectors, then
5113 // express it as a vector shuffle of those two inputs. Otherwise extract the
5114 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5116 for (unsigned High = 0; High < 2; ++High) {
5117 Register &Output = High ? Hi : Lo;
5118
5119 // Build a shuffle mask for the output, discovering on the fly which
5120 // input vectors to use as shuffle operands (recorded in InputUsed).
5121 // If building a suitable shuffle vector proves too hard, then bail
5122 // out with useBuildVector set.
5123 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5124 unsigned FirstMaskIdx = High * NewElts;
5125 bool UseBuildVector = false;
5126 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5127 // The mask element. This indexes into the input.
5128 int Idx = Mask[FirstMaskIdx + MaskOffset];
5129
5130 // The input vector this mask element indexes into.
5131 unsigned Input = (unsigned)Idx / NewElts;
5132
5133 if (Input >= std::size(Inputs)) {
5134 // The mask element does not index into any input vector.
5135 Ops.push_back(-1);
5136 continue;
5137 }
5138
5139 // Turn the index into an offset from the start of the input vector.
5140 Idx -= Input * NewElts;
5141
5142 // Find or create a shuffle vector operand to hold this input.
5143 unsigned OpNo;
5144 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5145 if (InputUsed[OpNo] == Input) {
5146 // This input vector is already an operand.
5147 break;
5148 } else if (InputUsed[OpNo] == -1U) {
5149 // Create a new operand for this input vector.
5150 InputUsed[OpNo] = Input;
5151 break;
5152 }
5153 }
5154
5155 if (OpNo >= std::size(InputUsed)) {
5156 // More than two input vectors used! Give up on trying to create a
5157 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5158 UseBuildVector = true;
5159 break;
5160 }
5161
5162 // Add the mask index for the new shuffle vector.
5163 Ops.push_back(Idx + OpNo * NewElts);
5164 }
5165
5166 if (UseBuildVector) {
5167 LLT EltTy = NarrowTy.getElementType();
5169
5170 // Extract the input elements by hand.
5171 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5172 // The mask element. This indexes into the input.
5173 int Idx = Mask[FirstMaskIdx + MaskOffset];
5174
5175 // The input vector this mask element indexes into.
5176 unsigned Input = (unsigned)Idx / NewElts;
5177
5178 if (Input >= std::size(Inputs)) {
5179 // The mask element is "undef" or indexes off the end of the input.
5180 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5181 continue;
5182 }
5183
5184 // Turn the index into an offset from the start of the input vector.
5185 Idx -= Input * NewElts;
5186
5187 // Extract the vector element by hand.
5188 SVOps.push_back(MIRBuilder
5189 .buildExtractVectorElement(
5190 EltTy, Inputs[Input],
5192 .getReg(0));
5193 }
5194
5195 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5196 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5197 } else if (InputUsed[0] == -1U) {
5198 // No input vectors were used! The result is undefined.
5199 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5200 } else {
5201 Register Op0 = Inputs[InputUsed[0]];
5202 // If only one input was used, use an undefined vector for the other.
5203 Register Op1 = InputUsed[1] == -1U
5204 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5205 : Inputs[InputUsed[1]];
5206 // At least one input vector was used. Create a new shuffle vector.
5207 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5208 }
5209
5210 Ops.clear();
5211 }
5212
5213 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
5214 MI.eraseFromParent();
5215 return Legalized;
5216}
5217
5219 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5220 auto &RdxMI = cast<GVecReduce>(MI);
5221
5222 if (TypeIdx != 1)
5223 return UnableToLegalize;
5224
5225 // The semantics of the normal non-sequential reductions allow us to freely
5226 // re-associate the operation.
5227 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5228
5229 if (NarrowTy.isVector() &&
5230 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5231 return UnableToLegalize;
5232
5233 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5234 SmallVector<Register> SplitSrcs;
5235 // If NarrowTy is a scalar then we're being asked to scalarize.
5236 const unsigned NumParts =
5237 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5238 : SrcTy.getNumElements();
5239
5240 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5241 if (NarrowTy.isScalar()) {
5242 if (DstTy != NarrowTy)
5243 return UnableToLegalize; // FIXME: handle implicit extensions.
5244
5245 if (isPowerOf2_32(NumParts)) {
5246 // Generate a tree of scalar operations to reduce the critical path.
5247 SmallVector<Register> PartialResults;
5248 unsigned NumPartsLeft = NumParts;
5249 while (NumPartsLeft > 1) {
5250 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5251 PartialResults.emplace_back(
5253 .buildInstr(ScalarOpc, {NarrowTy},
5254 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5255 .getReg(0));
5256 }
5257 SplitSrcs = PartialResults;
5258 PartialResults.clear();
5259 NumPartsLeft = SplitSrcs.size();
5260 }
5261 assert(SplitSrcs.size() == 1);
5262 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5263 MI.eraseFromParent();
5264 return Legalized;
5265 }
5266 // If we can't generate a tree, then just do sequential operations.
5267 Register Acc = SplitSrcs[0];
5268 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5269 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5270 .getReg(0);
5271 MIRBuilder.buildCopy(DstReg, Acc);
5272 MI.eraseFromParent();
5273 return Legalized;
5274 }
5275 SmallVector<Register> PartialReductions;
5276 for (unsigned Part = 0; Part < NumParts; ++Part) {
5277 PartialReductions.push_back(
5278 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5279 .getReg(0));
5280 }
5281
5282 // If the types involved are powers of 2, we can generate intermediate vector
5283 // ops, before generating a final reduction operation.
5284 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5285 isPowerOf2_32(NarrowTy.getNumElements())) {
5286 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5287 }
5288
5289 Register Acc = PartialReductions[0];
5290 for (unsigned Part = 1; Part < NumParts; ++Part) {
5291 if (Part == NumParts - 1) {
5292 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5293 {Acc, PartialReductions[Part]});
5294 } else {
5295 Acc = MIRBuilder
5296 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5297 .getReg(0);
5298 }
5299 }
5300 MI.eraseFromParent();
5301 return Legalized;
5302}
5303
5306 unsigned int TypeIdx,
5307 LLT NarrowTy) {
5308 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5309 MI.getFirst3RegLLTs();
5310 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5311 DstTy != NarrowTy)
5312 return UnableToLegalize;
5313
5314 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5315 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5316 "Unexpected vecreduce opcode");
5317 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5318 ? TargetOpcode::G_FADD
5319 : TargetOpcode::G_FMUL;
5320
5321 SmallVector<Register> SplitSrcs;
5322 unsigned NumParts = SrcTy.getNumElements();
5323 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5324 Register Acc = ScalarReg;
5325 for (unsigned i = 0; i < NumParts; i++)
5326 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5327 .getReg(0);
5328
5329 MIRBuilder.buildCopy(DstReg, Acc);
5330 MI.eraseFromParent();
5331 return Legalized;
5332}
5333
5335LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5336 LLT SrcTy, LLT NarrowTy,
5337 unsigned ScalarOpc) {
5338 SmallVector<Register> SplitSrcs;
5339 // Split the sources into NarrowTy size pieces.
5340 extractParts(SrcReg, NarrowTy,
5341 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5342 MIRBuilder, MRI);
5343 // We're going to do a tree reduction using vector operations until we have
5344 // one NarrowTy size value left.
5345 while (SplitSrcs.size() > 1) {
5346 SmallVector<Register> PartialRdxs;
5347 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5348 Register LHS = SplitSrcs[Idx];
5349 Register RHS = SplitSrcs[Idx + 1];
5350 // Create the intermediate vector op.
5351 Register Res =
5352 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5353 PartialRdxs.push_back(Res);
5354 }
5355 SplitSrcs = std::move(PartialRdxs);
5356 }
5357 // Finally generate the requested NarrowTy based reduction.
5359 MI.getOperand(1).setReg(SplitSrcs[0]);
5361 return Legalized;
5362}
5363
5366 const LLT HalfTy, const LLT AmtTy) {
5367
5368 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5369 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5370 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5371
5372 if (Amt.isZero()) {
5373 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5374 MI.eraseFromParent();
5375 return Legalized;
5376 }
5377
5378 LLT NVT = HalfTy;
5379 unsigned NVTBits = HalfTy.getSizeInBits();
5380 unsigned VTBits = 2 * NVTBits;
5381
5382 SrcOp Lo(Register(0)), Hi(Register(0));
5383 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5384 if (Amt.ugt(VTBits)) {
5385 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5386 } else if (Amt.ugt(NVTBits)) {
5387 Lo = MIRBuilder.buildConstant(NVT, 0);
5388 Hi = MIRBuilder.buildShl(NVT, InL,
5389 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5390 } else if (Amt == NVTBits) {
5391 Lo = MIRBuilder.buildConstant(NVT, 0);
5392 Hi = InL;
5393 } else {
5394 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5395 auto OrLHS =
5396 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5397 auto OrRHS = MIRBuilder.buildLShr(
5398 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5399 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5400 }
5401 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5402 if (Amt.ugt(VTBits)) {
5403 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5404 } else if (Amt.ugt(NVTBits)) {
5405 Lo = MIRBuilder.buildLShr(NVT, InH,
5406 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5407 Hi = MIRBuilder.buildConstant(NVT, 0);
5408 } else if (Amt == NVTBits) {
5409 Lo = InH;
5410 Hi = MIRBuilder.buildConstant(NVT, 0);
5411 } else {
5412 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5413
5414 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5415 auto OrRHS = MIRBuilder.buildShl(
5416 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5417
5418 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5419 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5420 }
5421 } else {
5422 if (Amt.ugt(VTBits)) {
5424 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5425 } else if (Amt.ugt(NVTBits)) {
5426 Lo = MIRBuilder.buildAShr(NVT, InH,
5427 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5428 Hi = MIRBuilder.buildAShr(NVT, InH,
5429 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5430 } else if (Amt == NVTBits) {
5431 Lo = InH;
5432 Hi = MIRBuilder.buildAShr(NVT, InH,
5433 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5434 } else {
5435 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5436
5437 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5438 auto OrRHS = MIRBuilder.buildShl(
5439 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5440
5441 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5442 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5443 }
5444 }
5445
5446 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5447 MI.eraseFromParent();
5448
5449 return Legalized;
5450}
5451
5452// TODO: Optimize if constant shift amount.
5455 LLT RequestedTy) {
5456 if (TypeIdx == 1) {
5458 narrowScalarSrc(MI, RequestedTy, 2);
5460 return Legalized;
5461 }
5462
5463 Register DstReg = MI.getOperand(0).getReg();
5464 LLT DstTy = MRI.getType(DstReg);
5465 if (DstTy.isVector())
5466 return UnableToLegalize;
5467
5468 Register Amt = MI.getOperand(2).getReg();
5469 LLT ShiftAmtTy = MRI.getType(Amt);
5470 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5471 if (DstEltSize % 2 != 0)
5472 return UnableToLegalize;
5473
5474 // Ignore the input type. We can only go to exactly half the size of the
5475 // input. If that isn't small enough, the resulting pieces will be further
5476 // legalized.
5477 const unsigned NewBitSize = DstEltSize / 2;
5478 const LLT HalfTy = LLT::scalar(NewBitSize);
5479 const LLT CondTy = LLT::scalar(1);
5480
5481 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5482 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5483 ShiftAmtTy);
5484 }
5485
5486 // TODO: Expand with known bits.
5487
5488 // Handle the fully general expansion by an unknown amount.
5489 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5490
5491 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5492 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5493 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5494
5495 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5496 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5497
5498 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5499 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5500 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5501
5502 Register ResultRegs[2];
5503 switch (MI.getOpcode()) {
5504 case TargetOpcode::G_SHL: {
5505 // Short: ShAmt < NewBitSize
5506 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5507
5508 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5509 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5510 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5511
5512 // Long: ShAmt >= NewBitSize
5513 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5514 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5515
5516 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5517 auto Hi = MIRBuilder.buildSelect(
5518 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5519
5520 ResultRegs[0] = Lo.getReg(0);
5521 ResultRegs[1] = Hi.getReg(0);
5522 break;
5523 }
5524 case TargetOpcode::G_LSHR:
5525 case TargetOpcode::G_ASHR: {
5526 // Short: ShAmt < NewBitSize
5527 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5528
5529 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5530 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5531 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5532
5533 // Long: ShAmt >= NewBitSize
5535 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5536 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5537 } else {
5538 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5539 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5540 }
5541 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5542 {InH, AmtExcess}); // Lo from Hi part.
5543
5544 auto Lo = MIRBuilder.buildSelect(
5545 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5546
5547 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5548
5549 ResultRegs[0] = Lo.getReg(0);
5550 ResultRegs[1] = Hi.getReg(0);
5551 break;
5552 }
5553 default:
5554 llvm_unreachable("not a shift");
5555 }
5556
5557 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5558 MI.eraseFromParent();
5559 return Legalized;
5560}
5561
5564 LLT MoreTy) {
5565 assert(TypeIdx == 0 && "Expecting only Idx 0");
5566
5568 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5569 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5571 moreElementsVectorSrc(MI, MoreTy, I);
5572 }
5573
5574 MachineBasicBlock &MBB = *MI.getParent();
5576 moreElementsVectorDst(MI, MoreTy, 0);
5578 return Legalized;
5579}
5580
5581MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5582 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5583 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5584
5585 switch (Opcode) {
5586 default:
5588 "getNeutralElementForVecReduce called with invalid opcode!");
5589 case TargetOpcode::G_VECREDUCE_ADD:
5590 case TargetOpcode::G_VECREDUCE_OR:
5591 case TargetOpcode::G_VECREDUCE_XOR:
5592 case TargetOpcode::G_VECREDUCE_UMAX:
5593 return MIRBuilder.buildConstant(Ty, 0);
5594 case TargetOpcode::G_VECREDUCE_MUL:
5595 return MIRBuilder.buildConstant(Ty, 1);
5596 case TargetOpcode::G_VECREDUCE_AND:
5597 case TargetOpcode::G_VECREDUCE_UMIN:
5600 case TargetOpcode::G_VECREDUCE_SMAX:
5603 case TargetOpcode::G_VECREDUCE_SMIN:
5606 case TargetOpcode::G_VECREDUCE_FADD:
5607 return MIRBuilder.buildFConstant(Ty, -0.0);
5608 case TargetOpcode::G_VECREDUCE_FMUL:
5609 return MIRBuilder.buildFConstant(Ty, 1.0);
5610 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5611 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5612 assert(false && "getNeutralElementForVecReduce unimplemented for "
5613 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5614 }
5615 llvm_unreachable("switch expected to return!");
5616}
5617
5620 LLT MoreTy) {
5621 unsigned Opc = MI.getOpcode();
5622 switch (Opc) {
5623 case TargetOpcode::G_IMPLICIT_DEF:
5624 case TargetOpcode::G_LOAD: {
5625 if (TypeIdx != 0)
5626 return UnableToLegalize;
5628 moreElementsVectorDst(MI, MoreTy, 0);
5630 return Legalized;
5631 }
5632 case TargetOpcode::G_STORE:
5633 if (TypeIdx != 0)
5634 return UnableToLegalize;
5636 moreElementsVectorSrc(MI, MoreTy, 0);
5638 return Legalized;
5639 case TargetOpcode::G_AND:
5640 case TargetOpcode::G_OR:
5641 case TargetOpcode::G_XOR:
5642 case TargetOpcode::G_ADD:
5643 case TargetOpcode::G_SUB:
5644 case TargetOpcode::G_MUL:
5645 case TargetOpcode::G_FADD:
5646 case TargetOpcode::G_FSUB:
5647 case TargetOpcode::G_FMUL:
5648 case TargetOpcode::G_FDIV:
5649 case TargetOpcode::G_FCOPYSIGN:
5650 case TargetOpcode::G_UADDSAT:
5651 case TargetOpcode::G_USUBSAT:
5652 case TargetOpcode::G_SADDSAT:
5653 case TargetOpcode::G_SSUBSAT:
5654 case TargetOpcode::G_SMIN:
5655 case TargetOpcode::G_SMAX:
5656 case TargetOpcode::G_UMIN:
5657 case TargetOpcode::G_UMAX:
5658 case TargetOpcode::G_FMINNUM:
5659 case TargetOpcode::G_FMAXNUM:
5660 case TargetOpcode::G_FMINNUM_IEEE:
5661 case TargetOpcode::G_FMAXNUM_IEEE:
5662 case TargetOpcode::G_FMINIMUM:
5663 case TargetOpcode::G_FMAXIMUM:
5664 case TargetOpcode::G_STRICT_FADD:
5665 case TargetOpcode::G_STRICT_FSUB:
5666 case TargetOpcode::G_STRICT_FMUL:
5667 case TargetOpcode::G_SHL:
5668 case TargetOpcode::G_ASHR:
5669 case TargetOpcode::G_LSHR: {
5671 moreElementsVectorSrc(MI, MoreTy, 1);
5672 moreElementsVectorSrc(MI, MoreTy, 2);
5673 moreElementsVectorDst(MI, MoreTy, 0);
5675 return Legalized;
5676 }
5677 case TargetOpcode::G_FMA:
5678 case TargetOpcode::G_STRICT_FMA:
5679 case TargetOpcode::G_FSHR:
5680 case TargetOpcode::G_FSHL: {
5682 moreElementsVectorSrc(MI, MoreTy, 1);
5683 moreElementsVectorSrc(MI, MoreTy, 2);
5684 moreElementsVectorSrc(MI, MoreTy, 3);
5685 moreElementsVectorDst(MI, MoreTy, 0);
5687 return Legalized;
5688 }
5689 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5690 case TargetOpcode::G_EXTRACT:
5691 if (TypeIdx != 1)
5692 return UnableToLegalize;
5694 moreElementsVectorSrc(MI, MoreTy, 1);
5696 return Legalized;
5697 case TargetOpcode::G_INSERT:
5698 case TargetOpcode::G_INSERT_VECTOR_ELT:
5699 case TargetOpcode::G_FREEZE:
5700 case TargetOpcode::G_FNEG:
5701 case TargetOpcode::G_FABS:
5702 case TargetOpcode::G_FSQRT:
5703 case TargetOpcode::G_FCEIL:
5704 case TargetOpcode::G_FFLOOR:
5705 case TargetOpcode::G_FNEARBYINT:
5706 case TargetOpcode::G_FRINT:
5707 case TargetOpcode::G_INTRINSIC_ROUND:
5708 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5709 case TargetOpcode::G_INTRINSIC_TRUNC:
5710 case TargetOpcode::G_BSWAP:
5711 case TargetOpcode::G_FCANONICALIZE:
5712 case TargetOpcode::G_SEXT_INREG:
5713 case TargetOpcode::G_ABS:
5714 if (TypeIdx != 0)
5715 return UnableToLegalize;
5717 moreElementsVectorSrc(MI, MoreTy, 1);
5718 moreElementsVectorDst(MI, MoreTy, 0);
5720 return Legalized;
5721 case TargetOpcode::G_SELECT: {
5722 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5723 if (TypeIdx == 1) {
5724 if (!CondTy.isScalar() ||
5725 DstTy.getElementCount() != MoreTy.getElementCount())
5726 return UnableToLegalize;
5727
5728 // This is turning a scalar select of vectors into a vector
5729 // select. Broadcast the select condition.
5730 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5732 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5734 return Legalized;
5735 }
5736
5737 if (CondTy.isVector())
5738 return UnableToLegalize;
5739
5741 moreElementsVectorSrc(MI, MoreTy, 2);
5742 moreElementsVectorSrc(MI, MoreTy, 3);
5743 moreElementsVectorDst(MI, MoreTy, 0);
5745 return Legalized;
5746 }
5747 case TargetOpcode::G_UNMERGE_VALUES:
5748 return UnableToLegalize;
5749 case TargetOpcode::G_PHI:
5750 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5751 case TargetOpcode::G_SHUFFLE_VECTOR:
5752 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5753 case TargetOpcode::G_BUILD_VECTOR: {
5755 for (auto Op : MI.uses()) {
5756 Elts.push_back(Op.getReg());
5757 }
5758
5759 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5761 }
5762
5764 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5765 MI.eraseFromParent();
5766 return Legalized;
5767 }
5768 case TargetOpcode::G_SEXT:
5769 case TargetOpcode::G_ZEXT:
5770 case TargetOpcode::G_ANYEXT:
5771 case TargetOpcode::G_TRUNC:
5772 case TargetOpcode::G_FPTRUNC:
5773 case TargetOpcode::G_FPEXT:
5774 case TargetOpcode::G_FPTOSI:
5775 case TargetOpcode::G_FPTOUI:
5776 case TargetOpcode::G_SITOFP:
5777 case TargetOpcode::G_UITOFP: {
5779 LLT SrcExtTy;
5780 LLT DstExtTy;
5781 if (TypeIdx == 0) {
5782 DstExtTy = MoreTy;
5783 SrcExtTy = LLT::fixed_vector(
5784 MoreTy.getNumElements(),
5785 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5786 } else {
5787 DstExtTy = LLT::fixed_vector(
5788 MoreTy.getNumElements(),
5789 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5790 SrcExtTy = MoreTy;
5791 }
5792 moreElementsVectorSrc(MI, SrcExtTy, 1);
5793 moreElementsVectorDst(MI, DstExtTy, 0);
5795 return Legalized;
5796 }
5797 case TargetOpcode::G_ICMP:
5798 case TargetOpcode::G_FCMP: {
5799 if (TypeIdx != 1)
5800 return UnableToLegalize;
5801
5803 moreElementsVectorSrc(MI, MoreTy, 2);
5804 moreElementsVectorSrc(MI, MoreTy, 3);
5805 LLT CondTy = LLT::fixed_vector(
5806 MoreTy.getNumElements(),
5807 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5808 moreElementsVectorDst(MI, CondTy, 0);
5810 return Legalized;
5811 }
5812 case TargetOpcode::G_BITCAST: {
5813 if (TypeIdx != 0)
5814 return UnableToLegalize;
5815
5816 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5817 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5818
5819 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5820 if (coefficient % DstTy.getNumElements() != 0)
5821 return UnableToLegalize;
5822
5823 coefficient = coefficient / DstTy.getNumElements();
5824
5825 LLT NewTy = SrcTy.changeElementCount(
5826 ElementCount::get(coefficient, MoreTy.isScalable()));
5828 moreElementsVectorSrc(MI, NewTy, 1);
5829 moreElementsVectorDst(MI, MoreTy, 0);
5831 return Legalized;
5832 }
5833 case TargetOpcode::G_VECREDUCE_FADD:
5834 case TargetOpcode::G_VECREDUCE_FMUL:
5835 case TargetOpcode::G_VECREDUCE_ADD:
5836 case TargetOpcode::G_VECREDUCE_MUL:
5837 case TargetOpcode::G_VECREDUCE_AND:
5838 case TargetOpcode::G_VECREDUCE_OR:
5839 case TargetOpcode::G_VECREDUCE_XOR:
5840 case TargetOpcode::G_VECREDUCE_SMAX:
5841 case TargetOpcode::G_VECREDUCE_SMIN:
5842 case TargetOpcode::G_VECREDUCE_UMAX:
5843 case TargetOpcode::G_VECREDUCE_UMIN: {
5844 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5845 MachineOperand &MO = MI.getOperand(1);
5846 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5847 auto NeutralElement = getNeutralElementForVecReduce(
5848 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5849
5851 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5852 i != e; i++) {
5853 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5854 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5855 NeutralElement, Idx);
5856 }
5857
5859 MO.setReg(NewVec.getReg(0));
5861 return Legalized;
5862 }
5863
5864 default:
5865 return UnableToLegalize;
5866 }
5867}
5868
5871 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5872 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5873 unsigned MaskNumElts = Mask.size();
5874 unsigned SrcNumElts = SrcTy.getNumElements();
5875 LLT DestEltTy = DstTy.getElementType();
5876
5877 if (MaskNumElts == SrcNumElts)
5878 return Legalized;
5879
5880 if (MaskNumElts < SrcNumElts) {
5881 // Extend mask to match new destination vector size with
5882 // undef values.
5883 SmallVector<int, 16> NewMask(Mask);
5884 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5885 NewMask.push_back(-1);
5886
5887 moreElementsVectorDst(MI, SrcTy, 0);
5889 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5890 MI.getOperand(1).getReg(),
5891 MI.getOperand(2).getReg(), NewMask);
5892 MI.eraseFromParent();
5893
5894 return Legalized;
5895 }
5896
5897 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5898 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5899 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5900
5901 // Create new source vectors by concatenating the initial
5902 // source vectors with undefined vectors of the same size.
5903 auto Undef = MIRBuilder.buildUndef(SrcTy);
5904 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5905 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5906 MOps1[0] = MI.getOperand(1).getReg();
5907 MOps2[0] = MI.getOperand(2).getReg();
5908
5909 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5910 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5911
5912 // Readjust mask for new input vector length.
5913 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5914 for (unsigned I = 0; I != MaskNumElts; ++I) {
5915 int Idx = Mask[I];
5916 if (Idx >= static_cast<int>(SrcNumElts))
5917 Idx += PaddedMaskNumElts - SrcNumElts;
5918 MappedOps[I] = Idx;
5919 }
5920
5921 // If we got more elements than required, extract subvector.
5922 if (MaskNumElts != PaddedMaskNumElts) {
5923 auto Shuffle =
5924 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5925
5926 SmallVector<Register, 16> Elts(MaskNumElts);
5927 for (unsigned I = 0; I < MaskNumElts; ++I) {
5928 Elts[I] =
5930 .getReg(0);
5931 }
5932 MIRBuilder.buildBuildVector(DstReg, Elts);
5933 } else {
5934 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5935 }
5936
5937 MI.eraseFromParent();
5939}
5940
5943 unsigned int TypeIdx, LLT MoreTy) {
5944 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5945 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5946 unsigned NumElts = DstTy.getNumElements();
5947 unsigned WidenNumElts = MoreTy.getNumElements();
5948
5949 if (DstTy.isVector() && Src1Ty.isVector() &&
5950 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5952 }
5953
5954 if (TypeIdx != 0)
5955 return UnableToLegalize;
5956
5957 // Expect a canonicalized shuffle.
5958 if (DstTy != Src1Ty || DstTy != Src2Ty)
5959 return UnableToLegalize;
5960
5961 moreElementsVectorSrc(MI, MoreTy, 1);
5962 moreElementsVectorSrc(MI, MoreTy, 2);
5963
5964 // Adjust mask based on new input vector length.
5965 SmallVector<int, 16> NewMask;
5966 for (unsigned I = 0; I != NumElts; ++I) {
5967 int Idx = Mask[I];
5968 if (Idx < static_cast<int>(NumElts))
5969 NewMask.push_back(Idx);
5970 else
5971 NewMask.push_back(Idx - NumElts + WidenNumElts);
5972 }
5973 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5974 NewMask.push_back(-1);
5975 moreElementsVectorDst(MI, MoreTy, 0);
5977 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5978 MI.getOperand(1).getReg(),
5979 MI.getOperand(2).getReg(), NewMask);
5980 MI.eraseFromParent();
5981 return Legalized;
5982}
5983
5984void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5985 ArrayRef<Register> Src1Regs,
5986 ArrayRef<Register> Src2Regs,
5987 LLT NarrowTy) {
5989 unsigned SrcParts = Src1Regs.size();
5990 unsigned DstParts = DstRegs.size();
5991
5992 unsigned DstIdx = 0; // Low bits of the result.
5993 Register FactorSum =
5994 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5995 DstRegs[DstIdx] = FactorSum;
5996
5997 unsigned CarrySumPrevDstIdx;
5999
6000 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6001 // Collect low parts of muls for DstIdx.
6002 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6003 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6005 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6006 Factors.push_back(Mul.getReg(0));
6007 }
6008 // Collect high parts of muls from previous DstIdx.
6009 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6010 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6011 MachineInstrBuilder Umulh =
6012 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6013 Factors.push_back(Umulh.getReg(0));
6014 }
6015 // Add CarrySum from additions calculated for previous DstIdx.
6016 if (DstIdx != 1) {
6017 Factors.push_back(CarrySumPrevDstIdx);
6018 }
6019
6020 Register CarrySum;
6021 // Add all factors and accumulate all carries into CarrySum.
6022 if (DstIdx != DstParts - 1) {
6023 MachineInstrBuilder Uaddo =
6024 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6025 FactorSum = Uaddo.getReg(0);
6026 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6027 for (unsigned i = 2; i < Factors.size(); ++i) {
6028 MachineInstrBuilder Uaddo =
6029 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6030 FactorSum = Uaddo.getReg(0);
6031 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6032 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6033 }
6034 } else {
6035 // Since value for the next index is not calculated, neither is CarrySum.
6036 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6037 for (unsigned i = 2; i < Factors.size(); ++i)
6038 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6039 }
6040
6041 CarrySumPrevDstIdx = CarrySum;
6042 DstRegs[DstIdx] = FactorSum;
6043 Factors.clear();
6044 }
6045}
6046
6049 LLT NarrowTy) {
6050 if (TypeIdx != 0)
6051 return UnableToLegalize;
6052
6053 Register DstReg = MI.getOperand(0).getReg();
6054 LLT DstType = MRI.getType(DstReg);
6055 // FIXME: add support for vector types
6056 if (DstType.isVector())
6057 return UnableToLegalize;
6058
6059 unsigned Opcode = MI.getOpcode();
6060 unsigned OpO, OpE, OpF;
6061 switch (Opcode) {
6062 case TargetOpcode::G_SADDO:
6063 case TargetOpcode::G_SADDE:
6064 case TargetOpcode::G_UADDO:
6065 case TargetOpcode::G_UADDE:
6066 case TargetOpcode::G_ADD:
6067 OpO = TargetOpcode::G_UADDO;
6068 OpE = TargetOpcode::G_UADDE;
6069 OpF = TargetOpcode::G_UADDE;
6070 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6071 OpF = TargetOpcode::G_SADDE;
6072 break;
6073 case TargetOpcode::G_SSUBO:
6074 case TargetOpcode::G_SSUBE:
6075 case TargetOpcode::G_USUBO:
6076 case TargetOpcode::G_USUBE:
6077 case TargetOpcode::G_SUB:
6078 OpO = TargetOpcode::G_USUBO;
6079 OpE = TargetOpcode::G_USUBE;
6080 OpF = TargetOpcode::G_USUBE;
6081 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6082 OpF = TargetOpcode::G_SSUBE;
6083 break;
6084 default:
6085 llvm_unreachable("Unexpected add/sub opcode!");
6086 }
6087
6088 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6089 unsigned NumDefs = MI.getNumExplicitDefs();
6090 Register Src1 = MI.getOperand(NumDefs).getReg();
6091 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
6092 Register CarryDst, CarryIn;
6093 if (NumDefs == 2)
6094 CarryDst = MI.getOperand(1).getReg();
6095 if (MI.getNumOperands() == NumDefs + 3)
6096 CarryIn = MI.getOperand(NumDefs + 2).getReg();
6097
6098 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6099 LLT LeftoverTy, DummyTy;
6100 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
6101 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6102 MIRBuilder, MRI);
6103 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6104 MRI);
6105
6106 int NarrowParts = Src1Regs.size();
6107 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
6108 Src1Regs.push_back(Src1Left[I]);
6109 Src2Regs.push_back(Src2Left[I]);
6110 }
6111 DstRegs.reserve(Src1Regs.size());
6112
6113 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
6114 Register DstReg =
6115 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
6117 // Forward the final carry-out to the destination register
6118 if (i == e - 1 && CarryDst)
6119 CarryOut = CarryDst;
6120
6121 if (!CarryIn) {
6122 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
6123 {Src1Regs[i], Src2Regs[i]});
6124 } else if (i == e - 1) {
6125 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
6126 {Src1Regs[i], Src2Regs[i], CarryIn});
6127 } else {
6128 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
6129 {Src1Regs[i], Src2Regs[i], CarryIn});
6130 }
6131
6132 DstRegs.push_back(DstReg);
6133 CarryIn = CarryOut;
6134 }
6135 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6136 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6137 ArrayRef(DstRegs).drop_front(NarrowParts));
6138
6139 MI.eraseFromParent();
6140 return Legalized;
6141}
6142
6145 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
6146
6147 LLT Ty = MRI.getType(DstReg);
6148 if (Ty.isVector())
6149 return UnableToLegalize;
6150
6151 unsigned Size = Ty.getSizeInBits();
6152 unsigned NarrowSize = NarrowTy.getSizeInBits();
6153 if (Size % NarrowSize != 0)
6154 return UnableToLegalize;
6155
6156 unsigned NumParts = Size / NarrowSize;
6157 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
6158 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6159
6160 SmallVector<Register, 2> Src1Parts, Src2Parts;
6161 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
6162 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6163 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6164 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6165
6166 // Take only high half of registers if this is high mul.
6167 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
6168 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6169 MI.eraseFromParent();
6170 return Legalized;
6171}
6172
6175 LLT NarrowTy) {
6176 if (TypeIdx != 0)
6177 return UnableToLegalize;
6178
6179 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
6180
6181 Register Src = MI.getOperand(1).getReg();
6182 LLT SrcTy = MRI.getType(Src);
6183
6184 // If all finite floats fit into the narrowed integer type, we can just swap
6185 // out the result type. This is practically only useful for conversions from
6186 // half to at least 16-bits, so just handle the one case.
6187 if (SrcTy.getScalarType() != LLT::scalar(16) ||
6188 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6189 return UnableToLegalize;
6190
6192 narrowScalarDst(MI, NarrowTy, 0,
6193 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6195 return Legalized;
6196}
6197
6200 LLT NarrowTy) {
6201 if (TypeIdx != 1)
6202 return UnableToLegalize;
6203
6204 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6205
6206 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6207 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6208 // NarrowSize.
6209 if (SizeOp1 % NarrowSize != 0)
6210 return UnableToLegalize;
6211 int NumParts = SizeOp1 / NarrowSize;
6212
6213 SmallVector<Register, 2> SrcRegs, DstRegs;
6215 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6216 MIRBuilder, MRI);
6217
6218 Register OpReg = MI.getOperand(0).getReg();
6219 uint64_t OpStart = MI.getOperand(2).getImm();
6220 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6221 for (int i = 0; i < NumParts; ++i) {
6222 unsigned SrcStart = i * NarrowSize;
6223
6224 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6225 // No part of the extract uses this subregister, ignore it.
6226 continue;
6227 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6228 // The entire subregister is extracted, forward the value.
6229 DstRegs.push_back(SrcRegs[i]);
6230 continue;
6231 }
6232
6233 // OpSegStart is where this destination segment would start in OpReg if it
6234 // extended infinitely in both directions.
6235 int64_t ExtractOffset;
6236 uint64_t SegSize;
6237 if (OpStart < SrcStart) {
6238 ExtractOffset = 0;
6239 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6240 } else {
6241 ExtractOffset = OpStart - SrcStart;
6242 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6243 }
6244
6245 Register SegReg = SrcRegs[i];
6246 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6247 // A genuine extract is needed.
6248 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6249 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
6250 }
6251
6252 DstRegs.push_back(SegReg);
6253 }
6254
6255 Register DstReg = MI.getOperand(0).getReg();
6256 if (MRI.getType(DstReg).isVector())
6257 MIRBuilder.buildBuildVector(DstReg, DstRegs);
6258 else if (DstRegs.size() > 1)
6259 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6260 else
6261 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
6262 MI.eraseFromParent();
6263 return Legalized;
6264}
6265
6268 LLT NarrowTy) {
6269 // FIXME: Don't know how to handle secondary types yet.
6270 if (TypeIdx != 0)
6271 return UnableToLegalize;
6272
6273 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6275 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6276 LLT LeftoverTy;
6277 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6278 LeftoverRegs, MIRBuilder, MRI);
6279
6280 for (Register Reg : LeftoverRegs)
6281 SrcRegs.push_back(Reg);
6282
6283 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6284 Register OpReg = MI.getOperand(2).getReg();
6285 uint64_t OpStart = MI.getOperand(3).getImm();
6286 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6287 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6288 unsigned DstStart = I * NarrowSize;
6289
6290 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6291 // The entire subregister is defined by this insert, forward the new
6292 // value.
6293 DstRegs.push_back(OpReg);
6294 continue;
6295 }
6296
6297 Register SrcReg = SrcRegs[I];
6298 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6299 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6300 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6301 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6302 }
6303
6304 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6305 // No part of the insert affects this subregister, forward the original.
6306 DstRegs.push_back(SrcReg);
6307 continue;
6308 }
6309
6310 // OpSegStart is where this destination segment would start in OpReg if it
6311 // extended infinitely in both directions.
6312 int64_t ExtractOffset, InsertOffset;
6313 uint64_t SegSize;
6314 if (OpStart < DstStart) {
6315 InsertOffset = 0;
6316 ExtractOffset = DstStart - OpStart;
6317 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6318 } else {
6319 InsertOffset = OpStart - DstStart;
6320 ExtractOffset = 0;
6321 SegSize =
6322 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6323 }
6324
6325 Register SegReg = OpReg;
6326 if (ExtractOffset != 0 || SegSize != OpSize) {
6327 // A genuine extract is needed.
6328 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6329 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6330 }
6331
6332 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6333 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6334 DstRegs.push_back(DstReg);
6335 }
6336
6337 uint64_t WideSize = DstRegs.size() * NarrowSize;
6338 Register DstReg = MI.getOperand(0).getReg();
6339 if (WideSize > RegTy.getSizeInBits()) {
6340 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6341 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6342 MIRBuilder.buildTrunc(DstReg, MergeReg);
6343 } else
6344 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6345
6346 MI.eraseFromParent();
6347 return Legalized;
6348}
6349
6352 LLT NarrowTy) {
6353 Register DstReg = MI.getOperand(0).getReg();
6354 LLT DstTy = MRI.getType(DstReg);
6355
6356 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6357
6358 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6359 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6360 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6361 LLT LeftoverTy;
6362 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6363 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6364 return UnableToLegalize;
6365
6366 LLT Unused;
6367 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6368 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6369 llvm_unreachable("inconsistent extractParts result");
6370
6371 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6372 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6373 {Src0Regs[I], Src1Regs[I]});
6374 DstRegs.push_back(Inst.getReg(0));
6375 }
6376
6377 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6378 auto Inst = MIRBuilder.buildInstr(
6379 MI.getOpcode(),
6380 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6381 DstLeftoverRegs.push_back(Inst.getReg(0));
6382 }
6383
6384 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6385 LeftoverTy, DstLeftoverRegs);
6386
6387 MI.eraseFromParent();
6388 return Legalized;
6389}
6390
6393 LLT NarrowTy) {
6394 if (TypeIdx != 0)
6395 return UnableToLegalize;
6396
6397 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6398
6399 LLT DstTy = MRI.getType(DstReg);
6400 if (DstTy.isVector())
6401 return UnableToLegalize;
6402
6404 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6405 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6406 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6407
6408 MI.eraseFromParent();
6409 return Legalized;
6410}
6411
6414 LLT NarrowTy) {
6415 if (TypeIdx != 0)
6416 return UnableToLegalize;
6417
6418 Register CondReg = MI.getOperand(1).getReg();
6419 LLT CondTy = MRI.getType(CondReg);
6420 if (CondTy.isVector()) // TODO: Handle vselect
6421 return UnableToLegalize;
6422
6423 Register DstReg = MI.getOperand(0).getReg();
6424 LLT DstTy = MRI.getType(DstReg);
6425
6426 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6427 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6428 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6429 LLT LeftoverTy;
6430 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6431 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6432 return UnableToLegalize;
6433
6434 LLT Unused;
6435 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6436 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6437 llvm_unreachable("inconsistent extractParts result");
6438
6439 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6440 auto Select = MIRBuilder.buildSelect(NarrowTy,
6441 CondReg, Src1Regs[I], Src2Regs[I]);
6442 DstRegs.push_back(Select.getReg(0));
6443 }
6444
6445 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6447 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6448 DstLeftoverRegs.push_back(Select.getReg(0));
6449 }
6450
6451 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6452 LeftoverTy, DstLeftoverRegs);
6453
6454 MI.eraseFromParent();
6455 return Legalized;
6456}
6457
6460 LLT NarrowTy) {
6461 if (TypeIdx != 1)
6462 return UnableToLegalize;
6463
6464 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6465 unsigned NarrowSize = NarrowTy.getSizeInBits();
6466
6467 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6468 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6469
6471 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6472 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6473 auto C_0 = B.buildConstant(NarrowTy, 0);
6474 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6475 UnmergeSrc.getReg(1), C_0);
6476 auto LoCTLZ = IsUndef ?
6477 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6478 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6479 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6480 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6481 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6482 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6483
6484 MI.eraseFromParent();
6485 return Legalized;
6486 }
6487
6488 return UnableToLegalize;
6489}
6490
6493 LLT NarrowTy) {
6494 if (TypeIdx != 1)
6495 return UnableToLegalize;
6496
6497 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6498 unsigned NarrowSize = NarrowTy.getSizeInBits();
6499
6500 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6501 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6502
6504 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6505 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6506 auto C_0 = B.buildConstant(NarrowTy, 0);
6507 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6508 UnmergeSrc.getReg(0), C_0);
6509 auto HiCTTZ = IsUndef ?
6510 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6511 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6512 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6513 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6514 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6515 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6516
6517 MI.eraseFromParent();
6518 return Legalized;
6519 }
6520
6521 return UnableToLegalize;
6522}
6523
6526 LLT NarrowTy) {
6527 if (TypeIdx != 1)
6528 return UnableToLegalize;
6529
6530 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6531 unsigned NarrowSize = NarrowTy.getSizeInBits();
6532
6533 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6534 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6535
6536 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6537 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6538 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6539
6540 MI.eraseFromParent();
6541 return Legalized;
6542 }
6543
6544 return UnableToLegalize;
6545}
6546
6549 LLT NarrowTy) {
6550 if (TypeIdx != 1)
6551 return UnableToLegalize;
6552
6554 Register ExpReg = MI.getOperand(2).getReg();
6555 LLT ExpTy = MRI.getType(ExpReg);
6556
6557 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6558
6559 // Clamp the exponent to the range of the target type.
6560 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6561 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6562 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6563 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6564
6565 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6567 MI.getOperand(2).setReg(Trunc.getReg(0));
6569 return Legalized;
6570}
6571
6574 unsigned Opc = MI.getOpcode();
6575 const auto &TII = MIRBuilder.getTII();
6576 auto isSupported = [this](const LegalityQuery &Q) {
6577 auto QAction = LI.getAction(Q).Action;
6578 return QAction == Legal || QAction == Libcall || QAction == Custom;
6579 };
6580 switch (Opc) {
6581 default:
6582 return UnableToLegalize;
6583 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6584 // This trivially expands to CTLZ.
6586 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6588 return Legalized;
6589 }
6590 case TargetOpcode::G_CTLZ: {
6591 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6592 unsigned Len = SrcTy.getSizeInBits();
6593
6594 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6595 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6596 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6597 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6598 auto ICmp = MIRBuilder.buildICmp(
6599 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6600 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6601 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6602 MI.eraseFromParent();
6603 return Legalized;
6604 }
6605 // for now, we do this:
6606 // NewLen = NextPowerOf2(Len);
6607 // x = x | (x >> 1);
6608 // x = x | (x >> 2);
6609 // ...
6610 // x = x | (x >>16);
6611 // x = x | (x >>32); // for 64-bit input
6612 // Upto NewLen/2
6613 // return Len - popcount(x);
6614 //
6615 // Ref: "Hacker's Delight" by Henry Warren
6616 Register Op = SrcReg;
6617 unsigned NewLen = PowerOf2Ceil(Len);
6618 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6619 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6620 auto MIBOp = MIRBuilder.buildOr(
6621 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6622 Op = MIBOp.getReg(0);
6623 }
6624 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6625 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6626 MIBPop);
6627 MI.eraseFromParent();
6628 return Legalized;
6629 }
6630 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6631 // This trivially expands to CTTZ.
6633 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6635 return Legalized;
6636 }
6637 case TargetOpcode::G_CTTZ: {
6638 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6639
6640 unsigned Len = SrcTy.getSizeInBits();
6641 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6642 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6643 // zero.
6644 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6645 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6646 auto ICmp = MIRBuilder.buildICmp(
6647 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6648 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6649 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6650 MI.eraseFromParent();
6651 return Legalized;
6652 }
6653 // for now, we use: { return popcount(~x & (x - 1)); }
6654 // unless the target has ctlz but not ctpop, in which case we use:
6655 // { return 32 - nlz(~x & (x-1)); }
6656 // Ref: "Hacker's Delight" by Henry Warren
6657 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6658 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6659 auto MIBTmp = MIRBuilder.buildAnd(
6660 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6661 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6662 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6663 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6664 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6665 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6666 MI.eraseFromParent();
6667 return Legalized;
6668 }
6670 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6671 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6673 return Legalized;
6674 }
6675 case TargetOpcode::G_CTPOP: {
6676 Register SrcReg = MI.getOperand(1).getReg();
6677 LLT Ty = MRI.getType(SrcReg);
6678 unsigned Size = Ty.getSizeInBits();
6680
6681 // Count set bits in blocks of 2 bits. Default approach would be
6682 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6683 // We use following formula instead:
6684 // B2Count = val - { (val >> 1) & 0x55555555 }
6685 // since it gives same result in blocks of 2 with one instruction less.
6686 auto C_1 = B.buildConstant(Ty, 1);
6687 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6688 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6689 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6690 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6691 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6692
6693 // In order to get count in blocks of 4 add values from adjacent block of 2.
6694 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6695 auto C_2 = B.buildConstant(Ty, 2);
6696 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6697 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6698 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6699 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6700 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6701 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6702
6703 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6704 // addition since count value sits in range {0,...,8} and 4 bits are enough
6705 // to hold such binary values. After addition high 4 bits still hold count
6706 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6707 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6708 auto C_4 = B.buildConstant(Ty, 4);
6709 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6710 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6711 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6712 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6713 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6714
6715 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6716 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6717 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6718 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6719
6720 // Shift count result from 8 high bits to low bits.
6721 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6722
6723 auto IsMulSupported = [this](const LLT Ty) {
6724 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6725 return Action == Legal || Action == WidenScalar || Action == Custom;
6726 };
6727 if (IsMulSupported(Ty)) {
6728 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6729 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6730 } else {
6731 auto ResTmp = B8Count;
6732 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6733 auto ShiftC = B.buildConstant(Ty, Shift);
6734 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6735 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6736 }
6737 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6738 }
6739 MI.eraseFromParent();
6740 return Legalized;
6741 }
6742 }
6743}
6744
6745// Check that (every element of) Reg is undef or not an exact multiple of BW.
6747 Register Reg, unsigned BW) {
6748 return matchUnaryPredicate(
6749 MRI, Reg,
6750 [=](const Constant *C) {
6751 // Null constant here means an undef.
6752 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6753 return !CI || CI->getValue().urem(BW) != 0;
6754 },
6755 /*AllowUndefs*/ true);
6756}
6757
6760 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6761 LLT Ty = MRI.getType(Dst);
6762 LLT ShTy = MRI.getType(Z);
6763
6764 unsigned BW = Ty.getScalarSizeInBits();
6765
6766 if (!isPowerOf2_32(BW))
6767 return UnableToLegalize;
6768
6769 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6770 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6771
6772 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6773 // fshl X, Y, Z -> fshr X, Y, -Z
6774 // fshr X, Y, Z -> fshl X, Y, -Z
6775 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6776 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6777 } else {
6778 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6779 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6780 auto One = MIRBuilder.buildConstant(ShTy, 1);
6781 if (IsFSHL) {
6782 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6783 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6784 } else {
6785 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6786 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6787 }
6788
6789 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6790 }
6791
6792 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6793 MI.eraseFromParent();
6794 return Legalized;
6795}
6796
6799 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6800 LLT Ty = MRI.getType(Dst);
6801 LLT ShTy = MRI.getType(Z);
6802
6803 const unsigned BW = Ty.getScalarSizeInBits();
6804 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6805
6806 Register ShX, ShY;
6807 Register ShAmt, InvShAmt;
6808
6809 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6810 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6811 // fshl: X << C | Y >> (BW - C)
6812 // fshr: X << (BW - C) | Y >> C
6813 // where C = Z % BW is not zero
6814 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6815 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6816 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6817 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6818 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6819 } else {
6820 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6821 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6822 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6823 if (isPowerOf2_32(BW)) {
6824 // Z % BW -> Z & (BW - 1)
6825 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6826 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6827 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6828 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6829 } else {
6830 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6831 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6832 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6833 }
6834
6835 auto One = MIRBuilder.buildConstant(ShTy, 1);
6836 if (IsFSHL) {
6837 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6838 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6839 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6840 } else {
6841 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6842 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6843 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6844 }
6845 }
6846
6847 MIRBuilder.buildOr(Dst, ShX, ShY);
6848 MI.eraseFromParent();
6849 return Legalized;
6850}
6851
6854 // These operations approximately do the following (while avoiding undefined
6855 // shifts by BW):
6856 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6857 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6858 Register Dst = MI.getOperand(0).getReg();
6859 LLT Ty = MRI.getType(Dst);
6860 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6861
6862 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6863 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6864
6865 // TODO: Use smarter heuristic that accounts for vector legalization.
6866 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6867 return lowerFunnelShiftAsShifts(MI);
6868
6869 // This only works for powers of 2, fallback to shifts if it fails.
6870 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6871 if (Result == UnableToLegalize)
6872 return lowerFunnelShiftAsShifts(MI);
6873 return Result;
6874}
6875
6877 auto [Dst, Src] = MI.getFirst2Regs();
6878 LLT DstTy = MRI.getType(Dst);
6879 LLT SrcTy = MRI.getType(Src);
6880
6881 uint32_t DstTySize = DstTy.getSizeInBits();
6882 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6883 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6884
6885 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6886 !isPowerOf2_32(SrcTyScalarSize))
6887 return UnableToLegalize;
6888
6889 // The step between extend is too large, split it by creating an intermediate
6890 // extend instruction
6891 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6892 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6893 // If the destination type is illegal, split it into multiple statements
6894 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6895 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6896 // Unmerge the vector
6897 LLT EltTy = MidTy.changeElementCount(
6899 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6900
6901 // ZExt the vectors
6902 LLT ZExtResTy = DstTy.changeElementCount(
6904 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6905 {UnmergeSrc.getReg(0)});
6906 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6907 {UnmergeSrc.getReg(1)});
6908
6909 // Merge the ending vectors
6910 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6911
6912 MI.eraseFromParent();
6913 return Legalized;
6914 }
6915 return UnableToLegalize;
6916}
6917
6919 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6921 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6922 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6923 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6924 // %lo16(<4 x s16>) = G_TRUNC %inlo
6925 // %hi16(<4 x s16>) = G_TRUNC %inhi
6926 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6927 // %res(<8 x s8>) = G_TRUNC %in16
6928
6929 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6930
6931 Register DstReg = MI.getOperand(0).getReg();
6932 Register SrcReg = MI.getOperand(1).getReg();
6933 LLT DstTy = MRI.getType(DstReg);
6934 LLT SrcTy = MRI.getType(SrcReg);
6935
6936 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6938 isPowerOf2_32(SrcTy.getNumElements()) &&
6940 // Split input type.
6941 LLT SplitSrcTy = SrcTy.changeElementCount(
6943
6944 // First, split the source into two smaller vectors.
6945 SmallVector<Register, 2> SplitSrcs;
6946 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6947
6948 // Truncate the splits into intermediate narrower elements.
6949 LLT InterTy;
6950 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6951 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6952 else
6953 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6954 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6955 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6956 }
6957
6958 // Combine the new truncates into one vector
6960 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6961
6962 // Truncate the new vector to the final result type
6963 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6964 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6965 else
6966 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6967
6968 MI.eraseFromParent();
6969
6970 return Legalized;
6971 }
6972 return UnableToLegalize;
6973}
6974
6977 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6978 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6979 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6980 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6981 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6982 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6983 MI.eraseFromParent();
6984 return Legalized;
6985}
6986
6988 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6989
6990 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6991 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6992
6994
6995 // If a rotate in the other direction is supported, use it.
6996 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6997 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6998 isPowerOf2_32(EltSizeInBits))
6999 return lowerRotateWithReverseRotate(MI);
7000
7001 // If a funnel shift is supported, use it.
7002 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7003 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7004 bool IsFShLegal = false;
7005 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7006 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7007 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7008 Register R3) {
7009 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7010 MI.eraseFromParent();
7011 return Legalized;
7012 };
7013 // If a funnel shift in the other direction is supported, use it.
7014 if (IsFShLegal) {
7015 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7016 } else if (isPowerOf2_32(EltSizeInBits)) {
7017 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7018 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7019 }
7020 }
7021
7022 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7023 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7024 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7025 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7026 Register ShVal;
7027 Register RevShiftVal;
7028 if (isPowerOf2_32(EltSizeInBits)) {
7029 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7030 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7031 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7032 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7033 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7034 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7035 RevShiftVal =
7036 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7037 } else {
7038 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7039 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7040 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7041 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7042 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7043 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7044 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7045 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7046 RevShiftVal =
7047 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7048 }
7049 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7050 MI.eraseFromParent();
7051 return Legalized;
7052}
7053
7054// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7055// representation.
7058 auto [Dst, Src] = MI.getFirst2Regs();
7059 const LLT S64 = LLT::scalar(64);
7060 const LLT S32 = LLT::scalar(32);
7061 const LLT S1 = LLT::scalar(1);
7062
7063 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7064
7065 // unsigned cul2f(ulong u) {
7066 // uint lz = clz(u);
7067 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7068 // u = (u << lz) & 0x7fffffffffffffffUL;
7069 // ulong t = u & 0xffffffffffUL;
7070 // uint v = (e << 23) | (uint)(u >> 40);
7071 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7072 // return as_float(v + r);
7073 // }
7074
7075 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
7076 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
7077
7078 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
7079
7080 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
7081 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
7082
7083 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
7084 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
7085
7086 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
7087 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
7088
7089 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
7090
7091 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
7092 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
7093
7094 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
7095 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
7096 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
7097
7098 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
7099 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
7100 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
7101 auto One = MIRBuilder.buildConstant(S32, 1);
7102
7103 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
7104 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
7105 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
7106 MIRBuilder.buildAdd(Dst, V, R);
7107
7108 MI.eraseFromParent();
7109 return Legalized;
7110}
7111
7113 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7114
7115 if (SrcTy == LLT::scalar(1)) {
7116 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
7117 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7118 MIRBuilder.buildSelect(Dst, Src, True, False);
7119 MI.eraseFromParent();
7120 return Legalized;
7121 }
7122
7123 if (SrcTy != LLT::scalar(64))
7124 return UnableToLegalize;
7125
7126 if (DstTy == LLT::scalar(32)) {
7127 // TODO: SelectionDAG has several alternative expansions to port which may
7128 // be more reasonble depending on the available instructions. If a target
7129 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
7130 // intermediate type, this is probably worse.
7131 return lowerU64ToF32BitOps(MI);
7132 }
7133
7134 return UnableToLegalize;
7135}
7136
7138 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7139
7140 const LLT S64 = LLT::scalar(64);
7141 const LLT S32 = LLT::scalar(32);
7142 const LLT S1 = LLT::scalar(1);
7143
7144 if (SrcTy == S1) {
7145 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
7146 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
7147 MIRBuilder.buildSelect(Dst, Src, True, False);
7148 MI.eraseFromParent();
7149 return Legalized;
7150 }
7151
7152 if (SrcTy != S64)
7153 return UnableToLegalize;
7154
7155 if (DstTy == S32) {
7156 // signed cl2f(long l) {
7157 // long s = l >> 63;
7158 // float r = cul2f((l + s) ^ s);
7159 // return s ? -r : r;
7160 // }
7161 Register L = Src;
7162 auto SignBit = MIRBuilder.buildConstant(S64, 63);
7163 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
7164
7165 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
7166 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
7167 auto R = MIRBuilder.buildUITOFP(S32, Xor);
7168
7169 auto RNeg = MIRBuilder.buildFNeg(S32, R);
7170 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
7172 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
7173 MI.eraseFromParent();
7174 return Legalized;
7175 }
7176
7177 return UnableToLegalize;
7178}
7179
7181 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7182 const LLT S64 = LLT::scalar(64);
7183 const LLT S32 = LLT::scalar(32);
7184
7185 if (SrcTy != S64 && SrcTy != S32)
7186 return UnableToLegalize;
7187 if (DstTy != S32 && DstTy != S64)
7188 return UnableToLegalize;
7189
7190 // FPTOSI gives same result as FPTOUI for positive signed integers.
7191 // FPTOUI needs to deal with fp values that convert to unsigned integers
7192 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7193
7194 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
7195 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7197 APInt::getZero(SrcTy.getSizeInBits()));
7198 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
7199
7200 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
7201
7202 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
7203 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7204 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7205 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
7206 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
7207 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
7208 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
7209
7210 const LLT S1 = LLT::scalar(1);
7211
7212 MachineInstrBuilder FCMP =
7213 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
7214 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
7215
7216 MI.eraseFromParent();
7217 return Legalized;
7218}
7219
7221 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7222 const LLT S64 = LLT::scalar(64);
7223 const LLT S32 = LLT::scalar(32);
7224
7225 // FIXME: Only f32 to i64 conversions are supported.
7226 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
7227 return UnableToLegalize;
7228
7229 // Expand f32 -> i64 conversion
7230 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7231 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7232
7233 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
7234
7235 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
7236 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
7237
7238 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
7239 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
7240
7241 auto SignMask = MIRBuilder.buildConstant(SrcTy,
7242 APInt::getSignMask(SrcEltBits));
7243 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
7244 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
7245 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
7246 Sign = MIRBuilder.buildSExt(DstTy, Sign);
7247
7248 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
7249 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
7250 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
7251
7252 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
7253 R = MIRBuilder.buildZExt(DstTy, R);
7254
7255 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
7256 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
7257 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
7258 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
7259
7260 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
7261 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
7262
7263 const LLT S1 = LLT::scalar(1);
7265 S1, Exponent, ExponentLoBit);
7266
7267 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
7268
7269 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7270 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7271
7272 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7273
7274 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7275 S1, Exponent, ZeroSrcTy);
7276
7277 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7278 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7279
7280 MI.eraseFromParent();
7281 return Legalized;
7282}
7283
7284// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7287 const LLT S1 = LLT::scalar(1);
7288 const LLT S32 = LLT::scalar(32);
7289
7290 auto [Dst, Src] = MI.getFirst2Regs();
7291 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7292 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7293
7294 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7295 return UnableToLegalize;
7296
7298 unsigned Flags = MI.getFlags();
7299 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7300 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7301 MI.eraseFromParent();
7302 return Legalized;
7303 }
7304
7305 const unsigned ExpMask = 0x7ff;
7306 const unsigned ExpBiasf64 = 1023;
7307 const unsigned ExpBiasf16 = 15;
7308
7309 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7310 Register U = Unmerge.getReg(0);
7311 Register UH = Unmerge.getReg(1);
7312
7313 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7315
7316 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7317 // add the f16 bias (15) to get the biased exponent for the f16 format.
7318 E = MIRBuilder.buildAdd(
7319 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7320
7323
7324 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7325 MIRBuilder.buildConstant(S32, 0x1ff));
7326 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7327
7328 auto Zero = MIRBuilder.buildConstant(S32, 0);
7329 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7330 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7331 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7332
7333 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7334 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7335 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7336 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7337
7338 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7339 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7340
7341 // N = M | (E << 12);
7342 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7343 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7344
7345 // B = clamp(1-E, 0, 13);
7346 auto One = MIRBuilder.buildConstant(S32, 1);
7347 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7348 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7350
7351 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7352 MIRBuilder.buildConstant(S32, 0x1000));
7353
7354 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7355 auto D0 = MIRBuilder.buildShl(S32, D, B);
7356
7357 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7358 D0, SigSetHigh);
7359 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7360 D = MIRBuilder.buildOr(S32, D, D1);
7361
7362 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7363 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7364
7365 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7367
7368 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7370 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7371
7372 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7374 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7375
7376 V1 = MIRBuilder.buildOr(S32, V0, V1);
7377 V = MIRBuilder.buildAdd(S32, V, V1);
7378
7379 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7380 E, MIRBuilder.buildConstant(S32, 30));
7381 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7382 MIRBuilder.buildConstant(S32, 0x7c00), V);
7383
7384 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7385 E, MIRBuilder.buildConstant(S32, 1039));
7386 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7387
7388 // Extract the sign bit.
7389 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7390 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7391
7392 // Insert the sign bit
7393 V = MIRBuilder.buildOr(S32, Sign, V);
7394
7395 MIRBuilder.buildTrunc(Dst, V);
7396 MI.eraseFromParent();
7397 return Legalized;
7398}
7399
7402 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7403 const LLT S64 = LLT::scalar(64);
7404 const LLT S16 = LLT::scalar(16);
7405
7406 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7408
7409 return UnableToLegalize;
7410}
7411
7413 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7414 LLT Ty = MRI.getType(Dst);
7415
7416 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7417 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7418 MI.eraseFromParent();
7419 return Legalized;
7420}
7421
7423 switch (Opc) {
7424 case TargetOpcode::G_SMIN:
7425 return CmpInst::ICMP_SLT;
7426 case TargetOpcode::G_SMAX:
7427 return CmpInst::ICMP_SGT;
7428 case TargetOpcode::G_UMIN:
7429 return CmpInst::ICMP_ULT;
7430 case TargetOpcode::G_UMAX:
7431 return CmpInst::ICMP_UGT;
7432 default:
7433 llvm_unreachable("not in integer min/max");
7434 }
7435}
7436
7438 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7439
7440 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7441 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7442
7443 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7444 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7445
7446 MI.eraseFromParent();
7447 return Legalized;
7448}
7449
7452 GSUCmp *Cmp = cast<GSUCmp>(&MI);
7453
7454 Register Dst = Cmp->getReg(0);
7455 LLT DstTy = MRI.getType(Dst);
7456 LLT CmpTy = DstTy.changeElementSize(1);
7457
7458 CmpInst::Predicate LTPredicate = Cmp->isSigned()
7461 CmpInst::Predicate GTPredicate = Cmp->isSigned()
7464
7465 auto One = MIRBuilder.buildConstant(DstTy, 1);
7466 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
7467 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
7468 Cmp->getRHSReg());
7469 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
7470
7471 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
7472 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
7473 Cmp->getRHSReg());
7474 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
7475
7476 MI.eraseFromParent();
7477 return Legalized;
7478}
7479
7482 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7483 const int Src0Size = Src0Ty.getScalarSizeInBits();
7484 const int Src1Size = Src1Ty.getScalarSizeInBits();
7485
7486 auto SignBitMask = MIRBuilder.buildConstant(
7487 Src0Ty, APInt::getSignMask(Src0Size));
7488
7489 auto NotSignBitMask = MIRBuilder.buildConstant(
7490 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7491
7492 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7493 Register And1;
7494 if (Src0Ty == Src1Ty) {
7495 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7496 } else if (Src0Size > Src1Size) {
7497 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7498 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7499 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7500 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7501 } else {
7502 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7503 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7504 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7505 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7506 }
7507
7508 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7509 // constants are a nan and -0.0, but the final result should preserve
7510 // everything.
7511 unsigned Flags = MI.getFlags();
7512
7513 // We masked the sign bit and the not-sign bit, so these are disjoint.
7514 Flags |= MachineInstr::Disjoint;
7515
7516 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7517
7518 MI.eraseFromParent();
7519 return Legalized;
7520}
7521
7524 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7525 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7526
7527 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7528 LLT Ty = MRI.getType(Dst);
7529
7530 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7531 // Insert canonicalizes if it's possible we need to quiet to get correct
7532 // sNaN behavior.
7533
7534 // Note this must be done here, and not as an optimization combine in the
7535 // absence of a dedicate quiet-snan instruction as we're using an
7536 // omni-purpose G_FCANONICALIZE.
7537 if (!isKnownNeverSNaN(Src0, MRI))
7538 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7539
7540 if (!isKnownNeverSNaN(Src1, MRI))
7541 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7542 }
7543
7544 // If there are no nans, it's safe to simply replace this with the non-IEEE
7545 // version.
7546 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7547 MI.eraseFromParent();
7548 return Legalized;
7549}
7550
7552 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7553 Register DstReg = MI.getOperand(0).getReg();
7554 LLT Ty = MRI.getType(DstReg);
7555 unsigned Flags = MI.getFlags();
7556
7557 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7558 Flags);
7559 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7560 MI.eraseFromParent();
7561 return Legalized;
7562}
7563
7566 auto [DstReg, X] = MI.getFirst2Regs();
7567 const unsigned Flags = MI.getFlags();
7568 const LLT Ty = MRI.getType(DstReg);
7569 const LLT CondTy = Ty.changeElementSize(1);
7570
7571 // round(x) =>
7572 // t = trunc(x);
7573 // d = fabs(x - t);
7574 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7575 // return t + o;
7576
7577 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7578
7579 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7580 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7581
7582 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7583 auto Cmp =
7584 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7585
7586 // Could emit G_UITOFP instead
7587 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7588 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7589 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7590 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7591
7592 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7593
7594 MI.eraseFromParent();
7595 return Legalized;
7596}
7597
7599 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7600 unsigned Flags = MI.getFlags();
7601 LLT Ty = MRI.getType(DstReg);
7602 const LLT CondTy = Ty.changeElementSize(1);
7603
7604 // result = trunc(src);
7605 // if (src < 0.0 && src != result)
7606 // result += -1.0.
7607
7608 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7609 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7610
7611 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7612 SrcReg, Zero, Flags);
7613 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7614 SrcReg, Trunc, Flags);
7615 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7616 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7617
7618 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7619 MI.eraseFromParent();
7620 return Legalized;
7621}
7622
7625 const unsigned NumOps = MI.getNumOperands();
7626 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7627 unsigned PartSize = Src0Ty.getSizeInBits();
7628
7629 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7630 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7631
7632 for (unsigned I = 2; I != NumOps; ++I) {
7633 const unsigned Offset = (I - 1) * PartSize;
7634
7635 Register SrcReg = MI.getOperand(I).getReg();
7636 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7637
7638 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7639 MRI.createGenericVirtualRegister(WideTy);
7640
7641 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7642 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7643 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7644 ResultReg = NextResult;
7645 }
7646
7647 if (DstTy.isPointer()) {
7649 DstTy.getAddressSpace())) {
7650 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7651 return UnableToLegalize;
7652 }
7653
7654 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7655 }
7656
7657 MI.eraseFromParent();
7658 return Legalized;
7659}
7660
7663 const unsigned NumDst = MI.getNumOperands() - 1;
7664 Register SrcReg = MI.getOperand(NumDst).getReg();
7665 Register Dst0Reg = MI.getOperand(0).getReg();
7666 LLT DstTy = MRI.getType(Dst0Reg);
7667 if (DstTy.isPointer())
7668 return UnableToLegalize; // TODO
7669
7670 SrcReg = coerceToScalar(SrcReg);
7671 if (!SrcReg)
7672 return UnableToLegalize;
7673
7674 // Expand scalarizing unmerge as bitcast to integer and shift.
7675 LLT IntTy = MRI.getType(SrcReg);
7676
7677 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7678
7679 const unsigned DstSize = DstTy.getSizeInBits();
7680 unsigned Offset = DstSize;
7681 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7682 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7683 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7684 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7685 }
7686
7687 MI.eraseFromParent();
7688 return Legalized;
7689}
7690
7691/// Lower a vector extract or insert by writing the vector to a stack temporary
7692/// and reloading the element or vector.
7693///
7694/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7695/// =>
7696/// %stack_temp = G_FRAME_INDEX
7697/// G_STORE %vec, %stack_temp
7698/// %idx = clamp(%idx, %vec.getNumElements())
7699/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7700/// %dst = G_LOAD %element_ptr
7703 Register DstReg = MI.getOperand(0).getReg();
7704 Register SrcVec = MI.getOperand(1).getReg();
7705 Register InsertVal;
7706 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7707 InsertVal = MI.getOperand(2).getReg();
7708
7709 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7710
7711 LLT VecTy = MRI.getType(SrcVec);
7712 LLT EltTy = VecTy.getElementType();
7713 unsigned NumElts = VecTy.getNumElements();
7714
7715 int64_t IdxVal;
7716 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7718 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7719
7720 if (InsertVal) {
7721 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7722 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7723 } else {
7724 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7725 }
7726
7727 MI.eraseFromParent();
7728 return Legalized;
7729 }
7730
7731 if (!EltTy.isByteSized()) { // Not implemented.
7732 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7733 return UnableToLegalize;
7734 }
7735
7736 unsigned EltBytes = EltTy.getSizeInBytes();
7737 Align VecAlign = getStackTemporaryAlignment(VecTy);
7738 Align EltAlign;
7739
7740 MachinePointerInfo PtrInfo;
7741 auto StackTemp = createStackTemporary(
7742 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7743 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7744
7745 // Get the pointer to the element, and be sure not to hit undefined behavior
7746 // if the index is out of bounds.
7747 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7748
7749 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7750 int64_t Offset = IdxVal * EltBytes;
7751 PtrInfo = PtrInfo.getWithOffset(Offset);
7752 EltAlign = commonAlignment(VecAlign, Offset);
7753 } else {
7754 // We lose information with a variable offset.
7755 EltAlign = getStackTemporaryAlignment(EltTy);
7756 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7757 }
7758
7759 if (InsertVal) {
7760 // Write the inserted element
7761 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7762
7763 // Reload the whole vector.
7764 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7765 } else {
7766 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7767 }
7768
7769 MI.eraseFromParent();
7770 return Legalized;
7771}
7772
7775 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7776 MI.getFirst3RegLLTs();
7777 LLT IdxTy = LLT::scalar(32);
7778
7779 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7780 Register Undef;
7782 LLT EltTy = DstTy.getScalarType();
7783
7784 for (int Idx : Mask) {
7785 if (Idx < 0) {
7786 if (!Undef.isValid())
7787 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7788 BuildVec.push_back(Undef);
7789 continue;
7790 }
7791
7792 if (Src0Ty.isScalar()) {
7793 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7794 } else {
7795 int NumElts = Src0Ty.getNumElements();
7796 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7797 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7798 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7799 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7800 BuildVec.push_back(Extract.getReg(0));
7801 }
7802 }
7803
7804 if (DstTy.isScalar())
7805 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7806 else
7807 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7808 MI.eraseFromParent();
7809 return Legalized;
7810}
7811
7814 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
7815 MI.getFirst4RegLLTs();
7816
7817 if (VecTy.isScalableVector())
7818 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
7819
7820 Align VecAlign = getStackTemporaryAlignment(VecTy);
7821 MachinePointerInfo PtrInfo;
7822 Register StackPtr =
7823 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
7824 PtrInfo)
7825 .getReg(0);
7826 MachinePointerInfo ValPtrInfo =
7828
7829 LLT IdxTy = LLT::scalar(32);
7830 LLT ValTy = VecTy.getElementType();
7831 Align ValAlign = getStackTemporaryAlignment(ValTy);
7832
7833 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
7834
7835 bool HasPassthru =
7836 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
7837
7838 if (HasPassthru)
7839 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
7840
7841 Register LastWriteVal;
7842 std::optional<APInt> PassthruSplatVal =
7843 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
7844
7845 if (PassthruSplatVal.has_value()) {
7846 LastWriteVal =
7847 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
7848 } else if (HasPassthru) {
7849 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
7850 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
7851 {LLT::scalar(32)}, {Popcount});
7852
7853 Register LastElmtPtr =
7854 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
7855 LastWriteVal =
7856 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
7857 .getReg(0);
7858 }
7859
7860 unsigned NumElmts = VecTy.getNumElements();
7861 for (unsigned I = 0; I < NumElmts; ++I) {
7862 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
7863 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
7864 Register ElmtPtr =
7865 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7866 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
7867
7868 LLT MaskITy = MaskTy.getElementType();
7869 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
7870 if (MaskITy.getSizeInBits() > 1)
7871 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
7872
7873 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
7874 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
7875
7876 if (HasPassthru && I == NumElmts - 1) {
7877 auto EndOfVector =
7878 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
7879 auto AllLanesSelected = MIRBuilder.buildICmp(
7880 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
7881 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
7882 {OutPos, EndOfVector});
7883 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
7884
7885 LastWriteVal =
7886 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
7887 .getReg(0);
7888 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
7889 }
7890 }
7891
7892 // TODO: Use StackPtr's FrameIndex alignment.
7893 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
7894
7895 MI.eraseFromParent();
7896 return Legalized;
7897}
7898
7900 Register AllocSize,
7901 Align Alignment,
7902 LLT PtrTy) {
7903 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7904
7905 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7906 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7907
7908 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7909 // have to generate an extra instruction to negate the alloc and then use
7910 // G_PTR_ADD to add the negative offset.
7911 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7912 if (Alignment > Align(1)) {
7913 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7914 AlignMask.negate();
7915 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7916 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7917 }
7918
7919 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7920}
7921
7924 const auto &MF = *MI.getMF();
7925 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7926 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7927 return UnableToLegalize;
7928
7929 Register Dst = MI.getOperand(0).getReg();
7930 Register AllocSize = MI.getOperand(1).getReg();
7931 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7932
7933 LLT PtrTy = MRI.getType(Dst);
7935 Register SPTmp =
7936 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7937
7938 MIRBuilder.buildCopy(SPReg, SPTmp);
7939 MIRBuilder.buildCopy(Dst, SPTmp);
7940
7941 MI.eraseFromParent();
7942 return Legalized;
7943}
7944
7948 if (!StackPtr)
7949 return UnableToLegalize;
7950
7951 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7952 MI.eraseFromParent();
7953 return Legalized;
7954}
7955
7959 if (!StackPtr)
7960 return UnableToLegalize;
7961
7962 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7963 MI.eraseFromParent();
7964 return Legalized;
7965}
7966
7969 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7970 unsigned Offset = MI.getOperand(2).getImm();
7971
7972 // Extract sub-vector or one element
7973 if (SrcTy.isVector()) {
7974 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7975 unsigned DstSize = DstTy.getSizeInBits();
7976
7977 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7978 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7979 // Unmerge and allow access to each Src element for the artifact combiner.
7980 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7981
7982 // Take element(s) we need to extract and copy it (merge them).
7983 SmallVector<Register, 8> SubVectorElts;
7984 for (unsigned Idx = Offset / SrcEltSize;
7985 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7986 SubVectorElts.push_back(Unmerge.getReg(Idx));
7987 }
7988 if (SubVectorElts.size() == 1)
7989 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7990 else
7991 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7992
7993 MI.eraseFromParent();
7994 return Legalized;
7995 }
7996 }
7997
7998 if (DstTy.isScalar() &&
7999 (SrcTy.isScalar() ||
8000 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
8001 LLT SrcIntTy = SrcTy;
8002 if (!SrcTy.isScalar()) {
8003 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
8004 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
8005 }
8006
8007 if (Offset == 0)
8008 MIRBuilder.buildTrunc(DstReg, SrcReg);
8009 else {
8010 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
8011 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
8012 MIRBuilder.buildTrunc(DstReg, Shr);
8013 }
8014
8015 MI.eraseFromParent();
8016 return Legalized;
8017 }
8018
8019 return UnableToLegalize;
8020}
8021
8023 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
8024 uint64_t Offset = MI.getOperand(3).getImm();
8025
8026 LLT DstTy = MRI.getType(Src);
8027 LLT InsertTy = MRI.getType(InsertSrc);
8028
8029 // Insert sub-vector or one element
8030 if (DstTy.isVector() && !InsertTy.isPointer()) {
8031 LLT EltTy = DstTy.getElementType();
8032 unsigned EltSize = EltTy.getSizeInBits();
8033 unsigned InsertSize = InsertTy.getSizeInBits();
8034
8035 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8036 (Offset + InsertSize <= DstTy.getSizeInBits())) {
8037 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
8039 unsigned Idx = 0;
8040 // Elements from Src before insert start Offset
8041 for (; Idx < Offset / EltSize; ++Idx) {
8042 DstElts.push_back(UnmergeSrc.getReg(Idx));
8043 }
8044
8045 // Replace elements in Src with elements from InsertSrc
8046 if (InsertTy.getSizeInBits() > EltSize) {
8047 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
8048 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
8049 ++Idx, ++i) {
8050 DstElts.push_back(UnmergeInsertSrc.getReg(i));
8051 }
8052 } else {
8053 DstElts.push_back(InsertSrc);
8054 ++Idx;
8055 }
8056
8057 // Remaining elements from Src after insert
8058 for (; Idx < DstTy.getNumElements(); ++Idx) {
8059 DstElts.push_back(UnmergeSrc.getReg(Idx));
8060 }
8061
8062 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
8063 MI.eraseFromParent();
8064 return Legalized;
8065 }
8066 }
8067
8068 if (InsertTy.isVector() ||
8069 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
8070 return UnableToLegalize;
8071
8073 if ((DstTy.isPointer() &&
8074 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
8075 (InsertTy.isPointer() &&
8076 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
8077 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8078 return UnableToLegalize;
8079 }
8080
8081 LLT IntDstTy = DstTy;
8082
8083 if (!DstTy.isScalar()) {
8084 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
8085 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
8086 }
8087
8088 if (!InsertTy.isScalar()) {
8089 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
8090 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
8091 }
8092
8093 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
8094 if (Offset != 0) {
8095 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
8096 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
8097 }
8098
8100 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
8101
8102 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
8103 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
8104 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
8105
8106 MIRBuilder.buildCast(Dst, Or);
8107 MI.eraseFromParent();
8108 return Legalized;
8109}
8110
8113 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
8114 MI.getFirst4RegLLTs();
8115 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
8116
8117 LLT Ty = Dst0Ty;
8118 LLT BoolTy = Dst1Ty;
8119
8120 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
8121
8122 if (IsAdd)
8123 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
8124 else
8125 MIRBuilder.buildSub(NewDst0, LHS, RHS);
8126
8127 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8128
8129 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8130
8131 // For an addition, the result should be less than one of the operands (LHS)
8132 // if and only if the other operand (RHS) is negative, otherwise there will
8133 // be overflow.
8134 // For a subtraction, the result should be less than one of the operands
8135 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8136 // otherwise there will be overflow.
8137 auto ResultLowerThanLHS =
8138 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
8139 auto ConditionRHS = MIRBuilder.buildICmp(
8140 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
8141
8142 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
8143
8144 MIRBuilder.buildCopy(Dst0, NewDst0);
8145 MI.eraseFromParent();
8146
8147 return Legalized;
8148}
8149
8152 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8153 LLT Ty = MRI.getType(Res);
8154 bool IsSigned;
8155 bool IsAdd;
8156 unsigned BaseOp;
8157 switch (MI.getOpcode()) {
8158 default:
8159 llvm_unreachable("unexpected addsat/subsat opcode");
8160 case TargetOpcode::G_UADDSAT:
8161 IsSigned = false;
8162 IsAdd = true;
8163 BaseOp = TargetOpcode::G_ADD;
8164 break;
8165 case TargetOpcode::G_SADDSAT:
8166 IsSigned = true;
8167 IsAdd = true;
8168 BaseOp = TargetOpcode::G_ADD;
8169 break;
8170 case TargetOpcode::G_USUBSAT:
8171 IsSigned = false;
8172 IsAdd = false;
8173 BaseOp = TargetOpcode::G_SUB;
8174 break;
8175 case TargetOpcode::G_SSUBSAT:
8176 IsSigned = true;
8177 IsAdd = false;
8178 BaseOp = TargetOpcode::G_SUB;
8179 break;
8180 }
8181
8182 if (IsSigned) {
8183 // sadd.sat(a, b) ->
8184 // hi = 0x7fffffff - smax(a, 0)
8185 // lo = 0x80000000 - smin(a, 0)
8186 // a + smin(smax(lo, b), hi)
8187 // ssub.sat(a, b) ->
8188 // lo = smax(a, -1) - 0x7fffffff
8189 // hi = smin(a, -1) - 0x80000000
8190 // a - smin(smax(lo, b), hi)
8191 // TODO: AMDGPU can use a "median of 3" instruction here:
8192 // a +/- med3(lo, b, hi)
8193 uint64_t NumBits = Ty.getScalarSizeInBits();
8194 auto MaxVal =
8196 auto MinVal =
8199 if (IsAdd) {
8200 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8201 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
8202 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
8203 } else {
8204 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
8205 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
8206 MaxVal);
8207 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
8208 MinVal);
8209 }
8210 auto RHSClamped =
8212 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
8213 } else {
8214 // uadd.sat(a, b) -> a + umin(~a, b)
8215 // usub.sat(a, b) -> a - umin(a, b)
8216 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
8217 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
8218 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
8219 }
8220
8221 MI.eraseFromParent();
8222 return Legalized;
8223}
8224
8227 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8228 LLT Ty = MRI.getType(Res);
8229 LLT BoolTy = Ty.changeElementSize(1);
8230 bool IsSigned;
8231 bool IsAdd;
8232 unsigned OverflowOp;
8233 switch (MI.getOpcode()) {
8234 default:
8235 llvm_unreachable("unexpected addsat/subsat opcode");
8236 case TargetOpcode::G_UADDSAT:
8237 IsSigned = false;
8238 IsAdd = true;
8239 OverflowOp = TargetOpcode::G_UADDO;
8240 break;
8241 case TargetOpcode::G_SADDSAT:
8242 IsSigned = true;
8243 IsAdd = true;
8244 OverflowOp = TargetOpcode::G_SADDO;
8245 break;
8246 case TargetOpcode::G_USUBSAT:
8247 IsSigned = false;
8248 IsAdd = false;
8249 OverflowOp = TargetOpcode::G_USUBO;
8250 break;
8251 case TargetOpcode::G_SSUBSAT:
8252 IsSigned = true;
8253 IsAdd = false;
8254 OverflowOp = TargetOpcode::G_SSUBO;
8255 break;
8256 }
8257
8258 auto OverflowRes =
8259 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
8260 Register Tmp = OverflowRes.getReg(0);
8261 Register Ov = OverflowRes.getReg(1);
8262 MachineInstrBuilder Clamp;
8263 if (IsSigned) {
8264 // sadd.sat(a, b) ->
8265 // {tmp, ov} = saddo(a, b)
8266 // ov ? (tmp >>s 31) + 0x80000000 : r
8267 // ssub.sat(a, b) ->
8268 // {tmp, ov} = ssubo(a, b)
8269 // ov ? (tmp >>s 31) + 0x80000000 : r
8270 uint64_t NumBits = Ty.getScalarSizeInBits();
8271 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
8272 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
8273 auto MinVal =
8275 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
8276 } else {
8277 // uadd.sat(a, b) ->
8278 // {tmp, ov} = uaddo(a, b)
8279 // ov ? 0xffffffff : tmp
8280 // usub.sat(a, b) ->
8281 // {tmp, ov} = usubo(a, b)
8282 // ov ? 0 : tmp
8283 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
8284 }
8285 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
8286
8287 MI.eraseFromParent();
8288 return Legalized;
8289}
8290
8293 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8294 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8295 "Expected shlsat opcode!");
8296 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8297 auto [Res, LHS, RHS] = MI.getFirst3Regs();
8298 LLT Ty = MRI.getType(Res);
8299 LLT BoolTy = Ty.changeElementSize(1);
8300
8301 unsigned BW = Ty.getScalarSizeInBits();
8302 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
8303 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
8304 : MIRBuilder.buildLShr(Ty, Result, RHS);
8305
8306 MachineInstrBuilder SatVal;
8307 if (IsSigned) {
8308 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
8309 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
8310 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
8311 MIRBuilder.buildConstant(Ty, 0));
8312 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
8313 } else {
8315 }
8316 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
8317 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
8318
8319 MI.eraseFromParent();
8320 return Legalized;
8321}
8322
8324 auto [Dst, Src] = MI.getFirst2Regs();
8325 const LLT Ty = MRI.getType(Src);
8326 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
8327 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8328
8329 // Swap most and least significant byte, set remaining bytes in Res to zero.
8330 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
8331 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
8332 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8333 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
8334
8335 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8336 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
8337 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8338 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8339 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
8340 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
8341 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8342 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
8343 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
8344 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
8345 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8346 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
8347 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
8348 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
8349 }
8350 Res.getInstr()->getOperand(0).setReg(Dst);
8351
8352 MI.eraseFromParent();
8353 return Legalized;
8354}
8355
8356//{ (Src & Mask) >> N } | { (Src << N) & Mask }
8358 MachineInstrBuilder Src, const APInt &Mask) {
8359 const LLT Ty = Dst.getLLTTy(*B.getMRI());
8360 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
8361 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
8362 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8363 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8364 return B.buildOr(Dst, LHS, RHS);
8365}
8366
8369 auto [Dst, Src] = MI.getFirst2Regs();
8370 const LLT Ty = MRI.getType(Src);
8371 unsigned Size = Ty.getScalarSizeInBits();
8372
8373 if (Size >= 8) {
8374 MachineInstrBuilder BSWAP =
8375 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8376
8377 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8378 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8379 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8380 MachineInstrBuilder Swap4 =
8381 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
8382
8383 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8384 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8385 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8386 MachineInstrBuilder Swap2 =
8387 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8388
8389 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8390 // 6|7
8391 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8392 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8393 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8394 } else {
8395 // Expand bitreverse for types smaller than 8 bits.
8397 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8399 if (I < J) {
8400 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8401 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8402 } else {
8403 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8404 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8405 }
8406
8407 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
8408 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8409 if (I == 0)
8410 Tmp = Tmp2;
8411 else
8412 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8413 }
8414 MIRBuilder.buildCopy(Dst, Tmp);
8415 }
8416
8417 MI.eraseFromParent();
8418 return Legalized;
8419}
8420
8424
8425 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8426 int NameOpIdx = IsRead ? 1 : 0;
8427 int ValRegIndex = IsRead ? 0 : 1;
8428
8429 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8430 const LLT Ty = MRI.getType(ValReg);
8431 const MDString *RegStr = cast<MDString>(
8432 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8433
8434 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8435 if (!PhysReg.isValid())
8436 return UnableToLegalize;
8437
8438 if (IsRead)
8439 MIRBuilder.buildCopy(ValReg, PhysReg);
8440 else
8441 MIRBuilder.buildCopy(PhysReg, ValReg);
8442
8443 MI.eraseFromParent();
8444 return Legalized;
8445}
8446
8449 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8450 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8451 Register Result = MI.getOperand(0).getReg();
8452 LLT OrigTy = MRI.getType(Result);
8453 auto SizeInBits = OrigTy.getScalarSizeInBits();
8454 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8455
8456 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8457 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8458 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8459 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8460
8461 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8462 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8463 MIRBuilder.buildTrunc(Result, Shifted);
8464
8465 MI.eraseFromParent();
8466 return Legalized;
8467}
8468
8471 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8472 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8473
8474 if (Mask == fcNone) {
8475 MIRBuilder.buildConstant(DstReg, 0);
8476 MI.eraseFromParent();
8477 return Legalized;
8478 }
8479 if (Mask == fcAllFlags) {
8480 MIRBuilder.buildConstant(DstReg, 1);
8481 MI.eraseFromParent();
8482 return Legalized;
8483 }
8484
8485 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8486 // version
8487
8488 unsigned BitSize = SrcTy.getScalarSizeInBits();
8489 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8490
8491 LLT IntTy = LLT::scalar(BitSize);
8492 if (SrcTy.isVector())
8493 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8494 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8495
8496 // Various masks.
8497 APInt SignBit = APInt::getSignMask(BitSize);
8498 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8499 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8500 APInt ExpMask = Inf;
8501 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8502 APInt QNaNBitMask =
8503 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8504 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8505
8506 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8507 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8508 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8509 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8510 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8511
8512 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8513 auto Sign =
8515
8516 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8517 // Clang doesn't support capture of structured bindings:
8518 LLT DstTyCopy = DstTy;
8519 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8520 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8521 };
8522
8523 // Tests that involve more than one class should be processed first.
8524 if ((Mask & fcFinite) == fcFinite) {
8525 // finite(V) ==> abs(V) u< exp_mask
8526 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8527 ExpMaskC));
8528 Mask &= ~fcFinite;
8529 } else if ((Mask & fcFinite) == fcPosFinite) {
8530 // finite(V) && V > 0 ==> V u< exp_mask
8531 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8532 ExpMaskC));
8533 Mask &= ~fcPosFinite;
8534 } else if ((Mask & fcFinite) == fcNegFinite) {
8535 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8536 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8537 ExpMaskC);
8538 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8539 appendToRes(And);
8540 Mask &= ~fcNegFinite;
8541 }
8542
8543 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8544 // fcZero | fcSubnormal => test all exponent bits are 0
8545 // TODO: Handle sign bit specific cases
8546 // TODO: Handle inverted case
8547 if (PartialCheck == (fcZero | fcSubnormal)) {
8548 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8550 ExpBits, ZeroC));
8551 Mask &= ~PartialCheck;
8552 }
8553 }
8554
8555 // Check for individual classes.
8556 if (FPClassTest PartialCheck = Mask & fcZero) {
8557 if (PartialCheck == fcPosZero)
8559 AsInt, ZeroC));
8560 else if (PartialCheck == fcZero)
8561 appendToRes(
8563 else // fcNegZero
8565 AsInt, SignBitC));
8566 }
8567
8568 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8569 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8570 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8571 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8572 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8573 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8574 auto SubnormalRes =
8576 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8577 if (PartialCheck == fcNegSubnormal)
8578 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8579 appendToRes(SubnormalRes);
8580 }
8581
8582 if (FPClassTest PartialCheck = Mask & fcInf) {
8583 if (PartialCheck == fcPosInf)
8585 AsInt, InfC));
8586 else if (PartialCheck == fcInf)
8587 appendToRes(
8589 else { // fcNegInf
8590 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8591 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8593 AsInt, NegInfC));
8594 }
8595 }
8596
8597 if (FPClassTest PartialCheck = Mask & fcNan) {
8598 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8599 if (PartialCheck == fcNan) {
8600 // isnan(V) ==> abs(V) u> int(inf)
8601 appendToRes(
8603 } else if (PartialCheck == fcQNan) {
8604 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8605 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8606 InfWithQnanBitC));
8607 } else { // fcSNan
8608 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8609 // abs(V) u< (unsigned(Inf) | quiet_bit)
8610 auto IsNan =
8612 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8613 Abs, InfWithQnanBitC);
8614 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8615 }
8616 }
8617
8618 if (FPClassTest PartialCheck = Mask & fcNormal) {
8619 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8620 // (max_exp-1))
8621 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8622 auto ExpMinusOne = MIRBuilder.buildSub(
8623 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8624 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8625 auto NormalRes =
8627 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8628 if (PartialCheck == fcNegNormal)
8629 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8630 else if (PartialCheck == fcPosNormal) {
8631 auto PosSign = MIRBuilder.buildXor(
8632 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8633 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8634 }
8635 appendToRes(NormalRes);
8636 }
8637
8638 MIRBuilder.buildCopy(DstReg, Res);
8639 MI.eraseFromParent();
8640 return Legalized;
8641}
8642
8644 // Implement G_SELECT in terms of XOR, AND, OR.
8645 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8646 MI.getFirst4RegLLTs();
8647
8648 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8649 if (IsEltPtr) {
8650 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8651 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8652 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8653 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8654 DstTy = NewTy;
8655 }
8656
8657 if (MaskTy.isScalar()) {
8658 // Turn the scalar condition into a vector condition mask if needed.
8659
8660 Register MaskElt = MaskReg;
8661
8662 // The condition was potentially zero extended before, but we want a sign
8663 // extended boolean.
8664 if (MaskTy != LLT::scalar(1))
8665 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8666
8667 // Continue the sign extension (or truncate) to match the data type.
8668 MaskElt =
8669 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8670
8671 if (DstTy.isVector()) {
8672 // Generate a vector splat idiom.
8673 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8674 MaskReg = ShufSplat.getReg(0);
8675 } else {
8676 MaskReg = MaskElt;
8677 }
8678 MaskTy = DstTy;
8679 } else if (!DstTy.isVector()) {
8680 // Cannot handle the case that mask is a vector and dst is a scalar.
8681 return UnableToLegalize;
8682 }
8683
8684 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8685 return UnableToLegalize;
8686 }
8687
8688 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8689 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8690 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8691 if (IsEltPtr) {
8692 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8693 MIRBuilder.buildIntToPtr(DstReg, Or);
8694 } else {
8695 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8696 }
8697 MI.eraseFromParent();
8698 return Legalized;
8699}
8700
8702 // Split DIVREM into individual instructions.
8703 unsigned Opcode = MI.getOpcode();
8704
8706 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8707 : TargetOpcode::G_UDIV,
8708 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8710 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8711 : TargetOpcode::G_UREM,
8712 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8719 // Expand %res = G_ABS %a into:
8720 // %v1 = G_ASHR %a, scalar_size-1
8721 // %v2 = G_ADD %a, %v1
8722 // %res = G_XOR %v2, %v1
8723 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8724 Register OpReg = MI.getOperand(1).getReg();
8725 auto ShiftAmt =
8726 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8727 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8728 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8729 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8730 MI.eraseFromParent();
8731 return Legalized;
8732}
8733
8736 // Expand %res = G_ABS %a into:
8737 // %v1 = G_CONSTANT 0
8738 // %v2 = G_SUB %v1, %a
8739 // %res = G_SMAX %a, %v2
8740 Register SrcReg = MI.getOperand(1).getReg();
8741 LLT Ty = MRI.getType(SrcReg);
8742 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8743 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8744 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8745 MI.eraseFromParent();
8746 return Legalized;
8747}
8748
8751 Register SrcReg = MI.getOperand(1).getReg();
8752 Register DestReg = MI.getOperand(0).getReg();
8753 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8754 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8755 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8756 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8757 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8758 MI.eraseFromParent();
8759 return Legalized;
8760}
8761
8764 Register SrcReg = MI.getOperand(1).getReg();
8765 LLT SrcTy = MRI.getType(SrcReg);
8766 LLT DstTy = MRI.getType(SrcReg);
8767
8768 // The source could be a scalar if the IR type was <1 x sN>.
8769 if (SrcTy.isScalar()) {
8770 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8771 return UnableToLegalize; // FIXME: handle extension.
8772 // This can be just a plain copy.
8774 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8776 return Legalized;
8777 }
8778 return UnableToLegalize;
8779}
8780
8782 MachineFunction &MF = *MI.getMF();
8784 LLVMContext &Ctx = MF.getFunction().getContext();
8785 Register ListPtr = MI.getOperand(1).getReg();
8786 LLT PtrTy = MRI.getType(ListPtr);
8787
8788 // LstPtr is a pointer to the head of the list. Get the address
8789 // of the head of the list.
8790 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8791 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8792 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8793 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8794
8795 const Align A(MI.getOperand(2).getImm());
8796 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8797 if (A > TLI.getMinStackArgumentAlignment()) {
8798 Register AlignAmt =
8799 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8800 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8801 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8802 VAList = AndDst.getReg(0);
8803 }
8804
8805 // Increment the pointer, VAList, to the next vaarg
8806 // The list should be bumped by the size of element in the current head of
8807 // list.
8808 Register Dst = MI.getOperand(0).getReg();
8809 LLT LLTTy = MRI.getType(Dst);
8810 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8811 auto IncAmt =
8812 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8813 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8814
8815 // Store the increment VAList to the legalized pointer
8817 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8818 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8819 // Load the actual argument out of the pointer VAList
8820 Align EltAlignment = DL.getABITypeAlign(Ty);
8821 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8822 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8823 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8824
8825 MI.eraseFromParent();
8826 return Legalized;
8827}
8828
8830 // On Darwin, -Os means optimize for size without hurting performance, so
8831 // only really optimize for size when -Oz (MinSize) is used.
8833 return MF.getFunction().hasMinSize();
8834 return MF.getFunction().hasOptSize();
8835}
8836
8837// Returns a list of types to use for memory op lowering in MemOps. A partial
8838// port of findOptimalMemOpLowering in TargetLowering.
8839static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8840 unsigned Limit, const MemOp &Op,
8841 unsigned DstAS, unsigned SrcAS,
8842 const AttributeList &FuncAttributes,
8843 const TargetLowering &TLI) {
8844 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8845 return false;
8846
8847 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8848
8849 if (Ty == LLT()) {
8850 // Use the largest scalar type whose alignment constraints are satisfied.
8851 // We only need to check DstAlign here as SrcAlign is always greater or
8852 // equal to DstAlign (or zero).
8853 Ty = LLT::scalar(64);
8854 if (Op.isFixedDstAlign())
8855 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8856 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8857 Ty = LLT::scalar(Ty.getSizeInBytes());
8858 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8859 // FIXME: check for the largest legal type we can load/store to.
8860 }
8861
8862 unsigned NumMemOps = 0;
8863 uint64_t Size = Op.size();
8864 while (Size) {
8865 unsigned TySize = Ty.getSizeInBytes();
8866 while (TySize > Size) {
8867 // For now, only use non-vector load / store's for the left-over pieces.
8868 LLT NewTy = Ty;
8869 // FIXME: check for mem op safety and legality of the types. Not all of
8870 // SDAGisms map cleanly to GISel concepts.
8871 if (NewTy.isVector())
8872 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8873 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8874 unsigned NewTySize = NewTy.getSizeInBytes();
8875 assert(NewTySize > 0 && "Could not find appropriate type");
8876
8877 // If the new LLT cannot cover all of the remaining bits, then consider
8878 // issuing a (or a pair of) unaligned and overlapping load / store.
8879 unsigned Fast;
8880 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8881 MVT VT = getMVTForLLT(Ty);
8882 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8884 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8886 Fast)
8887 TySize = Size;
8888 else {
8889 Ty = NewTy;
8890 TySize = NewTySize;
8891 }
8892 }
8893
8894 if (++NumMemOps > Limit)
8895 return false;
8896
8897 MemOps.push_back(Ty);
8898 Size -= TySize;
8899 }
8900
8901 return true;
8902}
8903
8904// Get a vectorized representation of the memset value operand, GISel edition.
8906 MachineRegisterInfo &MRI = *MIB.getMRI();
8907 unsigned NumBits = Ty.getScalarSizeInBits();
8908 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8909 if (!Ty.isVector() && ValVRegAndVal) {
8910 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8911 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8912 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8913 }
8914
8915 // Extend the byte value to the larger type, and then multiply by a magic
8916 // value 0x010101... in order to replicate it across every byte.
8917 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8918 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8919 return MIB.buildConstant(Ty, 0).getReg(0);
8920 }
8921
8922 LLT ExtType = Ty.getScalarType();
8923 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8924 if (NumBits > 8) {
8925 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8926 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8927 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8928 }
8929
8930 // For vector types create a G_BUILD_VECTOR.
8931 if (Ty.isVector())
8932 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8933
8934 return Val;
8935}
8936
8938LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8939 uint64_t KnownLen, Align Alignment,
8940 bool IsVolatile) {
8941 auto &MF = *MI.getParent()->getParent();
8942 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8943 auto &DL = MF.getDataLayout();
8944 LLVMContext &C = MF.getFunction().getContext();
8945
8946 assert(KnownLen != 0 && "Have a zero length memset length!");
8947
8948 bool DstAlignCanChange = false;
8949 MachineFrameInfo &MFI = MF.getFrameInfo();
8950 bool OptSize = shouldLowerMemFuncForSize(MF);
8951
8952 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8953 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8954 DstAlignCanChange = true;
8955
8956 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8957 std::vector<LLT> MemOps;
8958
8959 const auto &DstMMO = **MI.memoperands_begin();
8960 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8961
8962 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8963 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8964
8965 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8966 MemOp::Set(KnownLen, DstAlignCanChange,
8967 Alignment,
8968 /*IsZeroMemset=*/IsZeroVal,
8969 /*IsVolatile=*/IsVolatile),
8970 DstPtrInfo.getAddrSpace(), ~0u,
8971 MF.getFunction().getAttributes(), TLI))
8972 return UnableToLegalize;
8973
8974 if (DstAlignCanChange) {
8975 // Get an estimate of the type from the LLT.
8976 Type *IRTy = getTypeForLLT(MemOps[0], C);
8977 Align NewAlign = DL.getABITypeAlign(IRTy);
8978 if (NewAlign > Alignment) {
8979 Alignment = NewAlign;
8980 unsigned FI = FIDef->getOperand(1).getIndex();
8981 // Give the stack frame object a larger alignment if needed.
8982 if (MFI.getObjectAlign(FI) < Alignment)
8983 MFI.setObjectAlignment(FI, Alignment);
8984 }
8985 }
8986
8987 MachineIRBuilder MIB(MI);
8988 // Find the largest store and generate the bit pattern for it.
8989 LLT LargestTy = MemOps[0];
8990 for (unsigned i = 1; i < MemOps.size(); i++)
8991 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8992 LargestTy = MemOps[i];
8993
8994 // The memset stored value is always defined as an s8, so in order to make it
8995 // work with larger store types we need to repeat the bit pattern across the
8996 // wider type.
8997 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8998
8999 if (!MemSetValue)
9000 return UnableToLegalize;
9001
9002 // Generate the stores. For each store type in the list, we generate the
9003 // matching store of that type to the destination address.
9004 LLT PtrTy = MRI.getType(Dst);
9005 unsigned DstOff = 0;
9006 unsigned Size = KnownLen;
9007 for (unsigned I = 0; I < MemOps.size(); I++) {
9008 LLT Ty = MemOps[I];
9009 unsigned TySize = Ty.getSizeInBytes();
9010 if (TySize > Size) {
9011 // Issuing an unaligned load / store pair that overlaps with the previous
9012 // pair. Adjust the offset accordingly.
9013 assert(I == MemOps.size() - 1 && I != 0);
9014 DstOff -= TySize - Size;
9015 }
9016
9017 // If this store is smaller than the largest store see whether we can get
9018 // the smaller value for free with a truncate.
9019 Register Value = MemSetValue;
9020 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
9021 MVT VT = getMVTForLLT(Ty);
9022 MVT LargestVT = getMVTForLLT(LargestTy);
9023 if (!LargestTy.isVector() && !Ty.isVector() &&
9024 TLI.isTruncateFree(LargestVT, VT))
9025 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9026 else
9027 Value = getMemsetValue(Val, Ty, MIB);
9028 if (!Value)
9029 return UnableToLegalize;
9030 }
9031
9032 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9033
9034 Register Ptr = Dst;
9035 if (DstOff != 0) {
9036 auto Offset =
9037 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
9038 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
9039 }
9040
9041 MIB.buildStore(Value, Ptr, *StoreMMO);
9042 DstOff += Ty.getSizeInBytes();
9043 Size -= TySize;
9044 }
9045
9046 MI.eraseFromParent();
9047 return Legalized;
9048}
9049
9051LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
9052 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9053
9054 auto [Dst, Src, Len] = MI.getFirst3Regs();
9055
9056 const auto *MMOIt = MI.memoperands_begin();
9057 const MachineMemOperand *MemOp = *MMOIt;
9058 bool IsVolatile = MemOp->isVolatile();
9059
9060 // See if this is a constant length copy
9061 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9062 // FIXME: support dynamically sized G_MEMCPY_INLINE
9063 assert(LenVRegAndVal &&
9064 "inline memcpy with dynamic size is not yet supported");
9065 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9066 if (KnownLen == 0) {
9067 MI.eraseFromParent();
9068 return Legalized;
9069 }
9070
9071 const auto &DstMMO = **MI.memoperands_begin();
9072 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9073 Align DstAlign = DstMMO.getBaseAlign();
9074 Align SrcAlign = SrcMMO.getBaseAlign();
9075
9076 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9077 IsVolatile);
9078}
9079
9081LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
9082 uint64_t KnownLen, Align DstAlign,
9083 Align SrcAlign, bool IsVolatile) {
9084 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9085 return lowerMemcpy(MI, Dst, Src, KnownLen,
9086 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9087 IsVolatile);
9088}
9089
9091LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
9092 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
9093 Align SrcAlign, bool IsVolatile) {
9094 auto &MF = *MI.getParent()->getParent();
9095 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9096 auto &DL = MF.getDataLayout();
9097 LLVMContext &C = MF.getFunction().getContext();
9098
9099 assert(KnownLen != 0 && "Have a zero length memcpy length!");
9100
9101 bool DstAlignCanChange = false;
9102 MachineFrameInfo &MFI = MF.getFrameInfo();
9103 Align Alignment = std::min(DstAlign, SrcAlign);
9104
9105 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9106 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9107 DstAlignCanChange = true;
9108
9109 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9110 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9111 // if the memcpy is in a tail call position.
9112
9113 std::vector<LLT> MemOps;
9114
9115 const auto &DstMMO = **MI.memoperands_begin();
9116 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9117 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9118 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9119
9121 MemOps, Limit,
9122 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9123 IsVolatile),
9124 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9125 MF.getFunction().getAttributes(), TLI))
9126 return UnableToLegalize;
9127
9128 if (DstAlignCanChange) {
9129 // Get an estimate of the type from the LLT.
9130 Type *IRTy = getTypeForLLT(MemOps[0], C);
9131 Align NewAlign = DL.getABITypeAlign(IRTy);
9132
9133 // Don't promote to an alignment that would require dynamic stack
9134 // realignment.
9135 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9136 if (!TRI->hasStackRealignment(MF))
9137 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
9138 NewAlign = NewAlign.previous();
9139
9140 if (NewAlign > Alignment) {
9141 Alignment = NewAlign;
9142 unsigned FI = FIDef->getOperand(1).getIndex();
9143 // Give the stack frame object a larger alignment if needed.
9144 if (MFI.getObjectAlign(FI) < Alignment)
9145 MFI.setObjectAlignment(FI, Alignment);
9146 }
9147 }
9148
9149 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
9150
9151 MachineIRBuilder MIB(MI);
9152 // Now we need to emit a pair of load and stores for each of the types we've
9153 // collected. I.e. for each type, generate a load from the source pointer of
9154 // that type width, and then generate a corresponding store to the dest buffer
9155 // of that value loaded. This can result in a sequence of loads and stores
9156 // mixed types, depending on what the target specifies as good types to use.
9157 unsigned CurrOffset = 0;
9158 unsigned Size = KnownLen;
9159 for (auto CopyTy : MemOps) {
9160 // Issuing an unaligned load / store pair that overlaps with the previous
9161 // pair. Adjust the offset accordingly.
9162 if (CopyTy.getSizeInBytes() > Size)
9163 CurrOffset -= CopyTy.getSizeInBytes() - Size;
9164
9165 // Construct MMOs for the accesses.
9166 auto *LoadMMO =
9167 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9168 auto *StoreMMO =
9169 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9170
9171 // Create the load.
9172 Register LoadPtr = Src;
9174 if (CurrOffset != 0) {
9175 LLT SrcTy = MRI.getType(Src);
9176 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
9177 .getReg(0);
9178 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9179 }
9180 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9181
9182 // Create the store.
9183 Register StorePtr = Dst;
9184 if (CurrOffset != 0) {
9185 LLT DstTy = MRI.getType(Dst);
9186 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9187 }
9188 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9189 CurrOffset += CopyTy.getSizeInBytes();
9190 Size -= CopyTy.getSizeInBytes();
9191 }
9192
9193 MI.eraseFromParent();
9194 return Legalized;
9195}
9196
9198LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
9199 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
9200 bool IsVolatile) {
9201 auto &MF = *MI.getParent()->getParent();
9202 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9203 auto &DL = MF.getDataLayout();
9204 LLVMContext &C = MF.getFunction().getContext();
9205
9206 assert(KnownLen != 0 && "Have a zero length memmove length!");
9207
9208 bool DstAlignCanChange = false;
9209 MachineFrameInfo &MFI = MF.getFrameInfo();
9210 bool OptSize = shouldLowerMemFuncForSize(MF);
9211 Align Alignment = std::min(DstAlign, SrcAlign);
9212
9213 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
9214 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
9215 DstAlignCanChange = true;
9216
9217 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
9218 std::vector<LLT> MemOps;
9219
9220 const auto &DstMMO = **MI.memoperands_begin();
9221 const auto &SrcMMO = **std::next(MI.memoperands_begin());
9222 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
9223 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
9224
9225 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9226 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9227 // same thing here.
9229 MemOps, Limit,
9230 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9231 /*IsVolatile*/ true),
9232 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
9233 MF.getFunction().getAttributes(), TLI))
9234 return UnableToLegalize;
9235
9236 if (DstAlignCanChange) {
9237 // Get an estimate of the type from the LLT.
9238 Type *IRTy = getTypeForLLT(MemOps[0], C);
9239 Align NewAlign = DL.getABITypeAlign(IRTy);
9240
9241 // Don't promote to an alignment that would require dynamic stack
9242 // realignment.
9243 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9244 if (!TRI->hasStackRealignment(MF))
9245 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
9246 NewAlign = NewAlign.previous();
9247
9248 if (NewAlign > Alignment) {
9249 Alignment = NewAlign;
9250 unsigned FI = FIDef->getOperand(1).getIndex();
9251 // Give the stack frame object a larger alignment if needed.
9252 if (MFI.getObjectAlign(FI) < Alignment)
9253 MFI.setObjectAlignment(FI, Alignment);
9254 }
9255 }
9256
9257 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
9258
9259 MachineIRBuilder MIB(MI);
9260 // Memmove requires that we perform the loads first before issuing the stores.
9261 // Apart from that, this loop is pretty much doing the same thing as the
9262 // memcpy codegen function.
9263 unsigned CurrOffset = 0;
9265 for (auto CopyTy : MemOps) {
9266 // Construct MMO for the load.
9267 auto *LoadMMO =
9268 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9269
9270 // Create the load.
9271 Register LoadPtr = Src;
9272 if (CurrOffset != 0) {
9273 LLT SrcTy = MRI.getType(Src);
9274 auto Offset =
9275 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
9276 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
9277 }
9278 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9279 CurrOffset += CopyTy.getSizeInBytes();
9280 }
9281
9282 CurrOffset = 0;
9283 for (unsigned I = 0; I < MemOps.size(); ++I) {
9284 LLT CopyTy = MemOps[I];
9285 // Now store the values loaded.
9286 auto *StoreMMO =
9287 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9288
9289 Register StorePtr = Dst;
9290 if (CurrOffset != 0) {
9291 LLT DstTy = MRI.getType(Dst);
9292 auto Offset =
9293 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
9294 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
9295 }
9296 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
9297 CurrOffset += CopyTy.getSizeInBytes();
9298 }
9299 MI.eraseFromParent();
9300 return Legalized;
9301}
9302
9305 const unsigned Opc = MI.getOpcode();
9306 // This combine is fairly complex so it's not written with a separate
9307 // matcher function.
9308 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9309 Opc == TargetOpcode::G_MEMSET) &&
9310 "Expected memcpy like instruction");
9311
9312 auto MMOIt = MI.memoperands_begin();
9313 const MachineMemOperand *MemOp = *MMOIt;
9314
9315 Align DstAlign = MemOp->getBaseAlign();
9316 Align SrcAlign;
9317 auto [Dst, Src, Len] = MI.getFirst3Regs();
9318
9319 if (Opc != TargetOpcode::G_MEMSET) {
9320 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
9321 MemOp = *(++MMOIt);
9322 SrcAlign = MemOp->getBaseAlign();
9323 }
9324
9325 // See if this is a constant length copy
9326 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
9327 if (!LenVRegAndVal)
9328 return UnableToLegalize;
9329 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9330
9331 if (KnownLen == 0) {
9332 MI.eraseFromParent();
9333 return Legalized;
9334 }
9335
9336 bool IsVolatile = MemOp->isVolatile();
9337 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9338 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9339 IsVolatile);
9340
9341 // Don't try to optimize volatile.
9342 if (IsVolatile)
9343 return UnableToLegalize;
9344
9345 if (MaxLen && KnownLen > MaxLen)
9346 return UnableToLegalize;
9347
9348 if (Opc == TargetOpcode::G_MEMCPY) {
9349 auto &MF = *MI.getParent()->getParent();
9350 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9351 bool OptSize = shouldLowerMemFuncForSize(MF);
9352 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
9353 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9354 IsVolatile);
9355 }
9356 if (Opc == TargetOpcode::G_MEMMOVE)
9357 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9358 if (Opc == TargetOpcode::G_MEMSET)
9359 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
9360 return UnableToLegalize;
9361}
unsigned const MachineRegisterInfo * MRI
#define Success
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:74
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1249
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1050
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1010
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
void negate()
Negate this APInt in place.
Definition: APInt.h:1428
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:820
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
bool isSigned() const
Definition: InstrTypes.h:1007
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:350
bool isBigEndian() const
Definition: DataLayout.h:196
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Represent a G_FCMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:231
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:940
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:244
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1981
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:646
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1523
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1568
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1172
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition: MathExtras.h:366
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:500
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:235
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:338
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1260
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:604
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)