LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy =
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
75 OrigTy.getElementType());
76 } else {
77 LeftoverTy = LLT::scalar(LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(NumParts, NumLeftover);
82}
83
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(Ctx);
92 case 32:
93 return Type::getFloatTy(Ctx);
94 case 64:
95 return Type::getDoubleTy(Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(Ctx);
98 case 128:
99 return Type::getFP128Ty(Ctx);
100 default:
101 return nullptr;
102 }
103}
104
107 MachineIRBuilder &Builder)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
111
115 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
117
120 LostDebugLocObserver &LocObserver) {
121 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
122
123 MIRBuilder.setInstrAndDebugLoc(MI);
124
125 if (isa<GIntrinsic>(MI))
126 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127 auto Step = LI.getAction(MI, MRI);
128 switch (Step.Action) {
129 case Legal:
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
131 return AlreadyLegal;
132 case Libcall:
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI, LocObserver);
135 case NarrowScalar:
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
138 case WidenScalar:
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI, Step.TypeIdx, Step.NewType);
141 case Bitcast:
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI, Step.TypeIdx, Step.NewType);
144 case Lower:
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI, Step.TypeIdx, Step.NewType);
147 case FewerElements:
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case MoreElements:
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
153 case Custom:
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
157 default:
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize;
160 }
161}
162
163void LegalizerHelper::insertParts(Register DstReg,
164 LLT ResultTy, LLT PartTy,
165 ArrayRef<Register> PartRegs,
166 LLT LeftoverTy,
167 ArrayRef<Register> LeftoverRegs) {
168 if (!LeftoverTy.isValid()) {
169 assert(LeftoverRegs.empty());
170
171 if (!ResultTy.isVector()) {
172 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
173 return;
174 }
175
176 if (PartTy.isVector())
177 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
178 else
179 MIRBuilder.buildBuildVector(DstReg, PartRegs);
180 return;
181 }
182
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy.isVector()) {
185 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 SmallVector<Register, 8> AllRegs(PartRegs);
187 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
281 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_LROUND:
415 RTLIBCASE(LROUND_F);
416 case TargetOpcode::G_LLROUND:
417 RTLIBCASE(LLROUND_F);
418 case TargetOpcode::G_MUL:
419 RTLIBCASE_INT(MUL_I);
420 case TargetOpcode::G_SDIV:
421 RTLIBCASE_INT(SDIV_I);
422 case TargetOpcode::G_UDIV:
423 RTLIBCASE_INT(UDIV_I);
424 case TargetOpcode::G_SREM:
425 RTLIBCASE_INT(SREM_I);
426 case TargetOpcode::G_UREM:
427 RTLIBCASE_INT(UREM_I);
428 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
429 RTLIBCASE_INT(CTLZ_I);
430 case TargetOpcode::G_FADD:
431 RTLIBCASE(ADD_F);
432 case TargetOpcode::G_FSUB:
433 RTLIBCASE(SUB_F);
434 case TargetOpcode::G_FMUL:
435 RTLIBCASE(MUL_F);
436 case TargetOpcode::G_FDIV:
437 RTLIBCASE(DIV_F);
438 case TargetOpcode::G_FEXP:
439 RTLIBCASE(EXP_F);
440 case TargetOpcode::G_FEXP2:
441 RTLIBCASE(EXP2_F);
442 case TargetOpcode::G_FEXP10:
443 RTLIBCASE(EXP10_F);
444 case TargetOpcode::G_FREM:
445 RTLIBCASE(REM_F);
446 case TargetOpcode::G_FPOW:
447 RTLIBCASE(POW_F);
448 case TargetOpcode::G_FPOWI:
449 RTLIBCASE(POWI_F);
450 case TargetOpcode::G_FMA:
451 RTLIBCASE(FMA_F);
452 case TargetOpcode::G_FSIN:
453 RTLIBCASE(SIN_F);
454 case TargetOpcode::G_FCOS:
455 RTLIBCASE(COS_F);
456 case TargetOpcode::G_FTAN:
457 RTLIBCASE(TAN_F);
458 case TargetOpcode::G_FASIN:
459 RTLIBCASE(ASIN_F);
460 case TargetOpcode::G_FACOS:
461 RTLIBCASE(ACOS_F);
462 case TargetOpcode::G_FATAN:
463 RTLIBCASE(ATAN_F);
464 case TargetOpcode::G_FATAN2:
465 RTLIBCASE(ATAN2_F);
466 case TargetOpcode::G_FSINH:
467 RTLIBCASE(SINH_F);
468 case TargetOpcode::G_FCOSH:
469 RTLIBCASE(COSH_F);
470 case TargetOpcode::G_FTANH:
471 RTLIBCASE(TANH_F);
472 case TargetOpcode::G_FSINCOS:
473 RTLIBCASE(SINCOS_F);
474 case TargetOpcode::G_FLOG10:
475 RTLIBCASE(LOG10_F);
476 case TargetOpcode::G_FLOG:
477 RTLIBCASE(LOG_F);
478 case TargetOpcode::G_FLOG2:
479 RTLIBCASE(LOG2_F);
480 case TargetOpcode::G_FLDEXP:
481 RTLIBCASE(LDEXP_F);
482 case TargetOpcode::G_FCEIL:
483 RTLIBCASE(CEIL_F);
484 case TargetOpcode::G_FFLOOR:
485 RTLIBCASE(FLOOR_F);
486 case TargetOpcode::G_FMINNUM:
487 RTLIBCASE(FMIN_F);
488 case TargetOpcode::G_FMAXNUM:
489 RTLIBCASE(FMAX_F);
490 case TargetOpcode::G_FMINIMUMNUM:
491 RTLIBCASE(FMINIMUM_NUM_F);
492 case TargetOpcode::G_FMAXIMUMNUM:
493 RTLIBCASE(FMAXIMUM_NUM_F);
494 case TargetOpcode::G_FSQRT:
495 RTLIBCASE(SQRT_F);
496 case TargetOpcode::G_FRINT:
497 RTLIBCASE(RINT_F);
498 case TargetOpcode::G_FNEARBYINT:
499 RTLIBCASE(NEARBYINT_F);
500 case TargetOpcode::G_INTRINSIC_TRUNC:
501 RTLIBCASE(TRUNC_F);
502 case TargetOpcode::G_INTRINSIC_ROUND:
503 RTLIBCASE(ROUND_F);
504 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
505 RTLIBCASE(ROUNDEVEN_F);
506 case TargetOpcode::G_INTRINSIC_LRINT:
507 RTLIBCASE(LRINT_F);
508 case TargetOpcode::G_INTRINSIC_LLRINT:
509 RTLIBCASE(LLRINT_F);
510 }
511 llvm_unreachable("Unknown libcall function");
512#undef RTLIBCASE_INT
513#undef RTLIBCASE
514}
515
516/// True if an instruction is in tail position in its caller. Intended for
517/// legalizing libcalls as tail calls when possible.
520 const TargetInstrInfo &TII,
522 MachineBasicBlock &MBB = *MI.getParent();
523 const Function &F = MBB.getParent()->getFunction();
524
525 // Conservatively require the attributes of the call to match those of
526 // the return. Ignore NoAlias and NonNull because they don't affect the
527 // call sequence.
528 AttributeList CallerAttrs = F.getAttributes();
529 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
530 .removeAttribute(Attribute::NoAlias)
531 .removeAttribute(Attribute::NonNull)
532 .hasAttributes())
533 return false;
534
535 // It's not safe to eliminate the sign / zero extension of the return value.
536 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
537 CallerAttrs.hasRetAttr(Attribute::SExt))
538 return false;
539
540 // Only tail call if the following instruction is a standard return or if we
541 // have a `thisreturn` callee, and a sequence like:
542 //
543 // G_MEMCPY %0, %1, %2
544 // $x0 = COPY %0
545 // RET_ReallyLR implicit $x0
546 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
547 if (Next != MBB.instr_end() && Next->isCopy()) {
548 if (MI.getOpcode() == TargetOpcode::G_BZERO)
549 return false;
550
551 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
552 // mempy/etc routines return the same parameter. For other it will be the
553 // returned value.
554 Register VReg = MI.getOperand(0).getReg();
555 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
556 return false;
557
558 Register PReg = Next->getOperand(0).getReg();
559 if (!PReg.isPhysical())
560 return false;
561
562 auto Ret = next_nodbg(Next, MBB.instr_end());
563 if (Ret == MBB.instr_end() || !Ret->isReturn())
564 return false;
565
566 if (Ret->getNumImplicitOperands() != 1)
567 return false;
568
569 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
570 return false;
571
572 // Skip over the COPY that we just validated.
573 Next = Ret;
574 }
575
576 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
577 return false;
578
579 return true;
580}
581
583llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
584 const CallLowering::ArgInfo &Result,
586 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
587 MachineInstr *MI) {
588 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
589
591 Info.CallConv = CC;
592 Info.Callee = MachineOperand::CreateES(Name);
593 Info.OrigRet = Result;
594 if (MI)
595 Info.IsTailCall =
596 (Result.Ty->isVoidTy() ||
597 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
598 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
599 *MIRBuilder.getMRI());
600
601 llvm::append_range(Info.OrigArgs, Args);
602 if (!CLI.lowerCall(MIRBuilder, Info))
604
605 if (MI && Info.LoweredTailCall) {
606 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
607
608 // Check debug locations before removing the return.
609 LocObserver.checkpoint(true);
610
611 // We must have a return following the call (or debug insts) to get past
612 // isLibCallInTailPosition.
613 do {
614 MachineInstr *Next = MI->getNextNode();
615 assert(Next &&
616 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
617 "Expected instr following MI to be return or debug inst?");
618 // We lowered a tail call, so the call is now the return from the block.
619 // Delete the old return.
620 Next->eraseFromParent();
621 } while (MI->getNextNode());
622
623 // We expect to lose the debug location from the return.
624 LocObserver.checkpoint(false);
625 }
627}
628
630llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
631 const CallLowering::ArgInfo &Result,
633 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
634 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
635 const char *Name = TLI.getLibcallName(Libcall);
636 if (!Name)
638 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
639 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
640}
641
642// Useful for libcalls where all operands have the same type.
645 Type *OpType, LostDebugLocObserver &LocObserver) {
646 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
647
648 // FIXME: What does the original arg index mean here?
650 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
651 Args.push_back({MO.getReg(), OpType, 0});
652 return createLibcall(MIRBuilder, Libcall,
653 {MI.getOperand(0).getReg(), OpType, 0}, Args,
654 LocObserver, &MI);
655}
656
657LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
658 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
659 LostDebugLocObserver &LocObserver) {
660 MachineFunction &MF = *MI.getMF();
661 MachineRegisterInfo &MRI = MF.getRegInfo();
662
663 Register DstSin = MI.getOperand(0).getReg();
664 Register DstCos = MI.getOperand(1).getReg();
665 Register Src = MI.getOperand(2).getReg();
666 LLT DstTy = MRI.getType(DstSin);
667
668 int MemSize = DstTy.getSizeInBytes();
669 Align Alignment = getStackTemporaryAlignment(DstTy);
670 const DataLayout &DL = MIRBuilder.getDataLayout();
671 unsigned AddrSpace = DL.getAllocaAddrSpace();
672 MachinePointerInfo PtrInfo;
673
674 Register StackPtrSin =
675 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
676 .getReg(0);
677 Register StackPtrCos =
678 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
679 .getReg(0);
680
681 auto &Ctx = MF.getFunction().getContext();
682 auto LibcallResult =
684 {{0}, Type::getVoidTy(Ctx), 0},
685 {{Src, OpType, 0},
686 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
687 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
688 LocObserver, &MI);
689
690 if (LibcallResult != LegalizeResult::Legalized)
692
694 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
696 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
697
698 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
699 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
700 MI.eraseFromParent();
701
703}
704
707 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
708 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
709
711 // Add all the args, except for the last which is an imm denoting 'tail'.
712 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
713 Register Reg = MI.getOperand(i).getReg();
714
715 // Need derive an IR type for call lowering.
716 LLT OpLLT = MRI.getType(Reg);
717 Type *OpTy = nullptr;
718 if (OpLLT.isPointer())
719 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
720 else
721 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
722 Args.push_back({Reg, OpTy, 0});
723 }
724
725 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
726 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
727 RTLIB::Libcall RTLibcall;
728 unsigned Opc = MI.getOpcode();
729 const char *Name;
730 switch (Opc) {
731 case TargetOpcode::G_BZERO:
732 RTLibcall = RTLIB::BZERO;
733 Name = TLI.getLibcallName(RTLibcall);
734 break;
735 case TargetOpcode::G_MEMCPY:
736 RTLibcall = RTLIB::MEMCPY;
737 Name = TLI.getMemcpyName();
738 Args[0].Flags[0].setReturned();
739 break;
740 case TargetOpcode::G_MEMMOVE:
741 RTLibcall = RTLIB::MEMMOVE;
742 Name = TLI.getLibcallName(RTLibcall);
743 Args[0].Flags[0].setReturned();
744 break;
745 case TargetOpcode::G_MEMSET:
746 RTLibcall = RTLIB::MEMSET;
747 Name = TLI.getLibcallName(RTLibcall);
748 Args[0].Flags[0].setReturned();
749 break;
750 default:
751 llvm_unreachable("unsupported opcode");
752 }
753
754 // Unsupported libcall on the target.
755 if (!Name) {
756 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
757 << MIRBuilder.getTII().getName(Opc) << "\n");
759 }
760
762 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
763 Info.Callee = MachineOperand::CreateES(Name);
764 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
765 Info.IsTailCall =
766 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
767 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
768
769 llvm::append_range(Info.OrigArgs, Args);
770 if (!CLI.lowerCall(MIRBuilder, Info))
772
773 if (Info.LoweredTailCall) {
774 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
775
776 // Check debug locations before removing the return.
777 LocObserver.checkpoint(true);
778
779 // We must have a return following the call (or debug insts) to get past
780 // isLibCallInTailPosition.
781 do {
782 MachineInstr *Next = MI.getNextNode();
783 assert(Next &&
784 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
785 "Expected instr following MI to be return or debug inst?");
786 // We lowered a tail call, so the call is now the return from the block.
787 // Delete the old return.
788 Next->eraseFromParent();
789 } while (MI.getNextNode());
790
791 // We expect to lose the debug location from the return.
792 LocObserver.checkpoint(false);
793 }
794
796}
797
798static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
799 unsigned Opc = MI.getOpcode();
800 auto &AtomicMI = cast<GMemOperation>(MI);
801 auto &MMO = AtomicMI.getMMO();
802 auto Ordering = MMO.getMergedOrdering();
803 LLT MemType = MMO.getMemoryType();
804 uint64_t MemSize = MemType.getSizeInBytes();
805 if (MemType.isVector())
806 return RTLIB::UNKNOWN_LIBCALL;
807
808#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
809#define LCALL5(A) \
810 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
811 switch (Opc) {
812 case TargetOpcode::G_ATOMIC_CMPXCHG:
813 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
814 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
815 return getOutlineAtomicHelper(LC, Ordering, MemSize);
816 }
817 case TargetOpcode::G_ATOMICRMW_XCHG: {
818 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
819 return getOutlineAtomicHelper(LC, Ordering, MemSize);
820 }
821 case TargetOpcode::G_ATOMICRMW_ADD:
822 case TargetOpcode::G_ATOMICRMW_SUB: {
823 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
824 return getOutlineAtomicHelper(LC, Ordering, MemSize);
825 }
826 case TargetOpcode::G_ATOMICRMW_AND: {
827 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
828 return getOutlineAtomicHelper(LC, Ordering, MemSize);
829 }
830 case TargetOpcode::G_ATOMICRMW_OR: {
831 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
832 return getOutlineAtomicHelper(LC, Ordering, MemSize);
833 }
834 case TargetOpcode::G_ATOMICRMW_XOR: {
835 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
836 return getOutlineAtomicHelper(LC, Ordering, MemSize);
837 }
838 default:
839 return RTLIB::UNKNOWN_LIBCALL;
840 }
841#undef LCALLS
842#undef LCALL5
843}
844
847 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
848
849 Type *RetTy;
850 SmallVector<Register> RetRegs;
852 unsigned Opc = MI.getOpcode();
853 switch (Opc) {
854 case TargetOpcode::G_ATOMIC_CMPXCHG:
855 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
857 LLT SuccessLLT;
858 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
859 MI.getFirst4RegLLTs();
860 RetRegs.push_back(Ret);
861 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
862 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
863 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
864 NewLLT) = MI.getFirst5RegLLTs();
865 RetRegs.push_back(Success);
866 RetTy = StructType::get(
867 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
868 }
869 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
870 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
871 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
872 break;
873 }
874 case TargetOpcode::G_ATOMICRMW_XCHG:
875 case TargetOpcode::G_ATOMICRMW_ADD:
876 case TargetOpcode::G_ATOMICRMW_SUB:
877 case TargetOpcode::G_ATOMICRMW_AND:
878 case TargetOpcode::G_ATOMICRMW_OR:
879 case TargetOpcode::G_ATOMICRMW_XOR: {
880 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
881 RetRegs.push_back(Ret);
882 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
883 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
884 Val =
885 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
886 .getReg(0);
887 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
888 Val =
889 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
890 .getReg(0);
891 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
892 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
893 break;
894 }
895 default:
896 llvm_unreachable("unsupported opcode");
897 }
898
899 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
900 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
901 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
902 const char *Name = TLI.getLibcallName(RTLibcall);
903
904 // Unsupported libcall on the target.
905 if (!Name) {
906 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
907 << MIRBuilder.getTII().getName(Opc) << "\n");
909 }
910
912 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
913 Info.Callee = MachineOperand::CreateES(Name);
914 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
915
916 llvm::append_range(Info.OrigArgs, Args);
917 if (!CLI.lowerCall(MIRBuilder, Info))
919
921}
922
923static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
924 Type *FromType) {
925 auto ToMVT = MVT::getVT(ToType);
926 auto FromMVT = MVT::getVT(FromType);
927
928 switch (Opcode) {
929 case TargetOpcode::G_FPEXT:
930 return RTLIB::getFPEXT(FromMVT, ToMVT);
931 case TargetOpcode::G_FPTRUNC:
932 return RTLIB::getFPROUND(FromMVT, ToMVT);
933 case TargetOpcode::G_FPTOSI:
934 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
935 case TargetOpcode::G_FPTOUI:
936 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
937 case TargetOpcode::G_SITOFP:
938 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
939 case TargetOpcode::G_UITOFP:
940 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
941 }
942 llvm_unreachable("Unsupported libcall function");
943}
944
947 Type *FromType, LostDebugLocObserver &LocObserver,
948 const TargetLowering &TLI, bool IsSigned = false) {
949 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
950 if (FromType->isIntegerTy()) {
951 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
952 Arg.Flags[0].setSExt();
953 else
954 Arg.Flags[0].setZExt();
955 }
956
957 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
958 return createLibcall(MIRBuilder, Libcall,
959 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
960 &MI);
961}
962
963static RTLIB::Libcall
965 RTLIB::Libcall RTLibcall;
966 switch (MI.getOpcode()) {
967 case TargetOpcode::G_GET_FPENV:
968 RTLibcall = RTLIB::FEGETENV;
969 break;
970 case TargetOpcode::G_SET_FPENV:
971 case TargetOpcode::G_RESET_FPENV:
972 RTLibcall = RTLIB::FESETENV;
973 break;
974 case TargetOpcode::G_GET_FPMODE:
975 RTLibcall = RTLIB::FEGETMODE;
976 break;
977 case TargetOpcode::G_SET_FPMODE:
978 case TargetOpcode::G_RESET_FPMODE:
979 RTLibcall = RTLIB::FESETMODE;
980 break;
981 default:
982 llvm_unreachable("Unexpected opcode");
983 }
984 return RTLibcall;
985}
986
987// Some library functions that read FP state (fegetmode, fegetenv) write the
988// state into a region in memory. IR intrinsics that do the same operations
989// (get_fpmode, get_fpenv) return the state as integer value. To implement these
990// intrinsics via the library functions, we need to use temporary variable,
991// for example:
992//
993// %0:_(s32) = G_GET_FPMODE
994//
995// is transformed to:
996//
997// %1:_(p0) = G_FRAME_INDEX %stack.0
998// BL &fegetmode
999// %0:_(s32) = G_LOAD % 1
1000//
1002LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1004 LostDebugLocObserver &LocObserver) {
1005 const DataLayout &DL = MIRBuilder.getDataLayout();
1006 auto &MF = MIRBuilder.getMF();
1007 auto &MRI = *MIRBuilder.getMRI();
1008 auto &Ctx = MF.getFunction().getContext();
1009
1010 // Create temporary, where library function will put the read state.
1011 Register Dst = MI.getOperand(0).getReg();
1012 LLT StateTy = MRI.getType(Dst);
1013 TypeSize StateSize = StateTy.getSizeInBytes();
1014 Align TempAlign = getStackTemporaryAlignment(StateTy);
1015 MachinePointerInfo TempPtrInfo;
1016 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1017
1018 // Create a call to library function, with the temporary as an argument.
1019 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1020 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1021 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1022 auto Res =
1023 createLibcall(MIRBuilder, RTLibcall,
1024 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1025 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1026 LocObserver, nullptr);
1027 if (Res != LegalizerHelper::Legalized)
1028 return Res;
1029
1030 // Create a load from the temporary.
1031 MachineMemOperand *MMO = MF.getMachineMemOperand(
1032 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1033 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1034
1036}
1037
1038// Similar to `createGetStateLibcall` the function calls a library function
1039// using transient space in stack. In this case the library function reads
1040// content of memory region.
1042LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1044 LostDebugLocObserver &LocObserver) {
1045 const DataLayout &DL = MIRBuilder.getDataLayout();
1046 auto &MF = MIRBuilder.getMF();
1047 auto &MRI = *MIRBuilder.getMRI();
1048 auto &Ctx = MF.getFunction().getContext();
1049
1050 // Create temporary, where library function will get the new state.
1051 Register Src = MI.getOperand(0).getReg();
1052 LLT StateTy = MRI.getType(Src);
1053 TypeSize StateSize = StateTy.getSizeInBytes();
1054 Align TempAlign = getStackTemporaryAlignment(StateTy);
1055 MachinePointerInfo TempPtrInfo;
1056 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1057
1058 // Put the new state into the temporary.
1059 MachineMemOperand *MMO = MF.getMachineMemOperand(
1060 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1061 MIRBuilder.buildStore(Src, Temp, *MMO);
1062
1063 // Create a call to library function, with the temporary as an argument.
1064 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1065 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1066 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1067 return createLibcall(MIRBuilder, RTLibcall,
1068 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1069 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1070 LocObserver, nullptr);
1071}
1072
1073/// Returns the corresponding libcall for the given Pred and
1074/// the ICMP predicate that should be generated to compare with #0
1075/// after the libcall.
1076static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1078#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1079 do { \
1080 switch (Size) { \
1081 case 32: \
1082 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1083 case 64: \
1084 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1085 case 128: \
1086 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1087 default: \
1088 llvm_unreachable("unexpected size"); \
1089 } \
1090 } while (0)
1091
1092 switch (Pred) {
1093 case CmpInst::FCMP_OEQ:
1095 case CmpInst::FCMP_UNE:
1097 case CmpInst::FCMP_OGE:
1099 case CmpInst::FCMP_OLT:
1101 case CmpInst::FCMP_OLE:
1103 case CmpInst::FCMP_OGT:
1105 case CmpInst::FCMP_UNO:
1107 default:
1108 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1109 }
1110}
1111
1113LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1115 LostDebugLocObserver &LocObserver) {
1116 auto &MF = MIRBuilder.getMF();
1117 auto &Ctx = MF.getFunction().getContext();
1118 const GFCmp *Cmp = cast<GFCmp>(&MI);
1119
1120 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1121 unsigned Size = OpLLT.getSizeInBits();
1122 if ((Size != 32 && Size != 64 && Size != 128) ||
1123 OpLLT != MRI.getType(Cmp->getRHSReg()))
1124 return UnableToLegalize;
1125
1126 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1127
1128 // DstReg type is s32
1129 const Register DstReg = Cmp->getReg(0);
1130 LLT DstTy = MRI.getType(DstReg);
1131 const auto Cond = Cmp->getCond();
1132
1133 // Reference:
1134 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1135 // Generates a libcall followed by ICMP.
1136 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1137 const CmpInst::Predicate ICmpPred,
1138 const DstOp &Res) -> Register {
1139 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1140 constexpr LLT TempLLT = LLT::scalar(32);
1141 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1142 // Generate libcall, holding result in Temp
1143 const auto Status = createLibcall(
1144 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1145 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1146 LocObserver, &MI);
1147 if (!Status)
1148 return {};
1149
1150 // Compare temp with #0 to get the final result.
1151 return MIRBuilder
1152 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1153 .getReg(0);
1154 };
1155
1156 // Simple case if we have a direct mapping from predicate to libcall
1157 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1158 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1159 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1160 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1161 return Legalized;
1162 }
1163 return UnableToLegalize;
1164 }
1165
1166 // No direct mapping found, should be generated as combination of libcalls.
1167
1168 switch (Cond) {
1169 case CmpInst::FCMP_UEQ: {
1170 // FCMP_UEQ: unordered or equal
1171 // Convert into (FCMP_OEQ || FCMP_UNO).
1172
1173 const auto [OeqLibcall, OeqPred] =
1175 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1176
1177 const auto [UnoLibcall, UnoPred] =
1179 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1180 if (Oeq && Uno)
1181 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1182 else
1183 return UnableToLegalize;
1184
1185 break;
1186 }
1187 case CmpInst::FCMP_ONE: {
1188 // FCMP_ONE: ordered and operands are unequal
1189 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1190
1191 // We inverse the predicate instead of generating a NOT
1192 // to save one instruction.
1193 // On AArch64 isel can even select two cmp into a single ccmp.
1194 const auto [OeqLibcall, OeqPred] =
1196 const auto NotOeq =
1197 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1198
1199 const auto [UnoLibcall, UnoPred] =
1201 const auto NotUno =
1202 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1203
1204 if (NotOeq && NotUno)
1205 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1206 else
1207 return UnableToLegalize;
1208
1209 break;
1210 }
1211 case CmpInst::FCMP_ULT:
1212 case CmpInst::FCMP_UGE:
1213 case CmpInst::FCMP_UGT:
1214 case CmpInst::FCMP_ULE:
1215 case CmpInst::FCMP_ORD: {
1216 // Convert into: !(inverse(Pred))
1217 // E.g. FCMP_ULT becomes !FCMP_OGE
1218 // This is equivalent to the following, but saves some instructions.
1219 // MIRBuilder.buildNot(
1220 // PredTy,
1221 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1222 // Op1, Op2));
1223 const auto [InversedLibcall, InversedPred] =
1225 if (!BuildLibcall(InversedLibcall,
1226 CmpInst::getInversePredicate(InversedPred), DstReg))
1227 return UnableToLegalize;
1228 break;
1229 }
1230 default:
1231 return UnableToLegalize;
1232 }
1233
1234 return Legalized;
1235}
1236
1237// The function is used to legalize operations that set default environment
1238// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1239// On most targets supported in glibc FE_DFL_MODE is defined as
1240// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1241// it is not true, the target must provide custom lowering.
1243LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1245 LostDebugLocObserver &LocObserver) {
1246 const DataLayout &DL = MIRBuilder.getDataLayout();
1247 auto &MF = MIRBuilder.getMF();
1248 auto &Ctx = MF.getFunction().getContext();
1249
1250 // Create an argument for the library function.
1251 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1252 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1253 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1254 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1255 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1256 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1257 MIRBuilder.buildIntToPtr(Dest, DefValue);
1258
1259 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1260 return createLibcall(MIRBuilder, RTLibcall,
1261 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1262 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1263 LocObserver, &MI);
1264}
1265
1268 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1269
1270 switch (MI.getOpcode()) {
1271 default:
1272 return UnableToLegalize;
1273 case TargetOpcode::G_MUL:
1274 case TargetOpcode::G_SDIV:
1275 case TargetOpcode::G_UDIV:
1276 case TargetOpcode::G_SREM:
1277 case TargetOpcode::G_UREM:
1278 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1279 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1280 unsigned Size = LLTy.getSizeInBits();
1281 Type *HLTy = IntegerType::get(Ctx, Size);
1282 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1283 if (Status != Legalized)
1284 return Status;
1285 break;
1286 }
1287 case TargetOpcode::G_FADD:
1288 case TargetOpcode::G_FSUB:
1289 case TargetOpcode::G_FMUL:
1290 case TargetOpcode::G_FDIV:
1291 case TargetOpcode::G_FMA:
1292 case TargetOpcode::G_FPOW:
1293 case TargetOpcode::G_FREM:
1294 case TargetOpcode::G_FCOS:
1295 case TargetOpcode::G_FSIN:
1296 case TargetOpcode::G_FTAN:
1297 case TargetOpcode::G_FACOS:
1298 case TargetOpcode::G_FASIN:
1299 case TargetOpcode::G_FATAN:
1300 case TargetOpcode::G_FATAN2:
1301 case TargetOpcode::G_FCOSH:
1302 case TargetOpcode::G_FSINH:
1303 case TargetOpcode::G_FTANH:
1304 case TargetOpcode::G_FLOG10:
1305 case TargetOpcode::G_FLOG:
1306 case TargetOpcode::G_FLOG2:
1307 case TargetOpcode::G_FEXP:
1308 case TargetOpcode::G_FEXP2:
1309 case TargetOpcode::G_FEXP10:
1310 case TargetOpcode::G_FCEIL:
1311 case TargetOpcode::G_FFLOOR:
1312 case TargetOpcode::G_FMINNUM:
1313 case TargetOpcode::G_FMAXNUM:
1314 case TargetOpcode::G_FMINIMUMNUM:
1315 case TargetOpcode::G_FMAXIMUMNUM:
1316 case TargetOpcode::G_FSQRT:
1317 case TargetOpcode::G_FRINT:
1318 case TargetOpcode::G_FNEARBYINT:
1319 case TargetOpcode::G_INTRINSIC_TRUNC:
1320 case TargetOpcode::G_INTRINSIC_ROUND:
1321 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1325 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1326 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1327 return UnableToLegalize;
1328 }
1329 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1330 if (Status != Legalized)
1331 return Status;
1332 break;
1333 }
1334 case TargetOpcode::G_FSINCOS: {
1335 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1336 unsigned Size = LLTy.getSizeInBits();
1337 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1338 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1339 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1340 return UnableToLegalize;
1341 }
1342 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1343 }
1344 case TargetOpcode::G_LROUND:
1345 case TargetOpcode::G_LLROUND:
1346 case TargetOpcode::G_INTRINSIC_LRINT:
1347 case TargetOpcode::G_INTRINSIC_LLRINT: {
1348 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1349 unsigned Size = LLTy.getSizeInBits();
1350 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1351 Type *ITy = IntegerType::get(
1352 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1353 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1354 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1355 return UnableToLegalize;
1356 }
1357 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1359 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1360 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1361 if (Status != Legalized)
1362 return Status;
1363 MI.eraseFromParent();
1364 return Legalized;
1365 }
1366 case TargetOpcode::G_FPOWI:
1367 case TargetOpcode::G_FLDEXP: {
1368 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1369 unsigned Size = LLTy.getSizeInBits();
1370 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1371 Type *ITy = IntegerType::get(
1372 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1373 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1374 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1375 return UnableToLegalize;
1376 }
1377 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1379 {MI.getOperand(1).getReg(), HLTy, 0},
1380 {MI.getOperand(2).getReg(), ITy, 1}};
1381 Args[1].Flags[0].setSExt();
1383 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1384 Args, LocObserver, &MI);
1385 if (Status != Legalized)
1386 return Status;
1387 break;
1388 }
1389 case TargetOpcode::G_FPEXT:
1390 case TargetOpcode::G_FPTRUNC: {
1391 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1392 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1393 if (!FromTy || !ToTy)
1394 return UnableToLegalize;
1396 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1397 if (Status != Legalized)
1398 return Status;
1399 break;
1400 }
1401 case TargetOpcode::G_FCMP: {
1402 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1403 if (Status != Legalized)
1404 return Status;
1405 MI.eraseFromParent();
1406 return Status;
1407 }
1408 case TargetOpcode::G_FPTOSI:
1409 case TargetOpcode::G_FPTOUI: {
1410 // FIXME: Support other types
1411 Type *FromTy =
1412 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1413 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1414 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1415 return UnableToLegalize;
1417 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1418 if (Status != Legalized)
1419 return Status;
1420 break;
1421 }
1422 case TargetOpcode::G_SITOFP:
1423 case TargetOpcode::G_UITOFP: {
1424 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1425 Type *ToTy =
1426 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1427 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1428 return UnableToLegalize;
1429 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1431 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1432 LocObserver, TLI, IsSigned);
1433 if (Status != Legalized)
1434 return Status;
1435 break;
1436 }
1437 case TargetOpcode::G_ATOMICRMW_XCHG:
1438 case TargetOpcode::G_ATOMICRMW_ADD:
1439 case TargetOpcode::G_ATOMICRMW_SUB:
1440 case TargetOpcode::G_ATOMICRMW_AND:
1441 case TargetOpcode::G_ATOMICRMW_OR:
1442 case TargetOpcode::G_ATOMICRMW_XOR:
1443 case TargetOpcode::G_ATOMIC_CMPXCHG:
1444 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1446 if (Status != Legalized)
1447 return Status;
1448 break;
1449 }
1450 case TargetOpcode::G_BZERO:
1451 case TargetOpcode::G_MEMCPY:
1452 case TargetOpcode::G_MEMMOVE:
1453 case TargetOpcode::G_MEMSET: {
1454 LegalizeResult Result =
1455 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1456 if (Result != Legalized)
1457 return Result;
1458 MI.eraseFromParent();
1459 return Result;
1460 }
1461 case TargetOpcode::G_GET_FPENV:
1462 case TargetOpcode::G_GET_FPMODE: {
1463 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1464 if (Result != Legalized)
1465 return Result;
1466 break;
1467 }
1468 case TargetOpcode::G_SET_FPENV:
1469 case TargetOpcode::G_SET_FPMODE: {
1470 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1471 if (Result != Legalized)
1472 return Result;
1473 break;
1474 }
1475 case TargetOpcode::G_RESET_FPENV:
1476 case TargetOpcode::G_RESET_FPMODE: {
1477 LegalizeResult Result =
1478 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1479 if (Result != Legalized)
1480 return Result;
1481 break;
1482 }
1483 }
1484
1485 MI.eraseFromParent();
1486 return Legalized;
1487}
1488
1490 unsigned TypeIdx,
1491 LLT NarrowTy) {
1492 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1493 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1494
1495 switch (MI.getOpcode()) {
1496 default:
1497 return UnableToLegalize;
1498 case TargetOpcode::G_IMPLICIT_DEF: {
1499 Register DstReg = MI.getOperand(0).getReg();
1500 LLT DstTy = MRI.getType(DstReg);
1501
1502 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1503 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1504 // FIXME: Although this would also be legal for the general case, it causes
1505 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1506 // combines not being hit). This seems to be a problem related to the
1507 // artifact combiner.
1508 if (SizeOp0 % NarrowSize != 0) {
1509 LLT ImplicitTy = NarrowTy;
1510 if (DstTy.isVector())
1511 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1512
1513 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1514 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1515
1516 MI.eraseFromParent();
1517 return Legalized;
1518 }
1519
1520 int NumParts = SizeOp0 / NarrowSize;
1521
1523 for (int i = 0; i < NumParts; ++i)
1524 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1525
1526 if (DstTy.isVector())
1527 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1528 else
1529 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1530 MI.eraseFromParent();
1531 return Legalized;
1532 }
1533 case TargetOpcode::G_CONSTANT: {
1534 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1535 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1536 unsigned TotalSize = Ty.getSizeInBits();
1537 unsigned NarrowSize = NarrowTy.getSizeInBits();
1538 int NumParts = TotalSize / NarrowSize;
1539
1540 SmallVector<Register, 4> PartRegs;
1541 for (int I = 0; I != NumParts; ++I) {
1542 unsigned Offset = I * NarrowSize;
1543 auto K = MIRBuilder.buildConstant(NarrowTy,
1544 Val.lshr(Offset).trunc(NarrowSize));
1545 PartRegs.push_back(K.getReg(0));
1546 }
1547
1548 LLT LeftoverTy;
1549 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1550 SmallVector<Register, 1> LeftoverRegs;
1551 if (LeftoverBits != 0) {
1552 LeftoverTy = LLT::scalar(LeftoverBits);
1553 auto K = MIRBuilder.buildConstant(
1554 LeftoverTy,
1555 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1556 LeftoverRegs.push_back(K.getReg(0));
1557 }
1558
1559 insertParts(MI.getOperand(0).getReg(),
1560 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565 case TargetOpcode::G_SEXT:
1566 case TargetOpcode::G_ZEXT:
1567 case TargetOpcode::G_ANYEXT:
1568 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1569 case TargetOpcode::G_TRUNC: {
1570 if (TypeIdx != 1)
1571 return UnableToLegalize;
1572
1573 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1574 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1575 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1576 return UnableToLegalize;
1577 }
1578
1579 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1580 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1581 MI.eraseFromParent();
1582 return Legalized;
1583 }
1584 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1585 case TargetOpcode::G_FREEZE: {
1586 if (TypeIdx != 0)
1587 return UnableToLegalize;
1588
1589 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1590 // Should widen scalar first
1591 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1592 return UnableToLegalize;
1593
1594 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1596 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1597 Parts.push_back(
1598 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1599 .getReg(0));
1600 }
1601
1602 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1603 MI.eraseFromParent();
1604 return Legalized;
1605 }
1606 case TargetOpcode::G_ADD:
1607 case TargetOpcode::G_SUB:
1608 case TargetOpcode::G_SADDO:
1609 case TargetOpcode::G_SSUBO:
1610 case TargetOpcode::G_SADDE:
1611 case TargetOpcode::G_SSUBE:
1612 case TargetOpcode::G_UADDO:
1613 case TargetOpcode::G_USUBO:
1614 case TargetOpcode::G_UADDE:
1615 case TargetOpcode::G_USUBE:
1616 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1617 case TargetOpcode::G_MUL:
1618 case TargetOpcode::G_UMULH:
1619 return narrowScalarMul(MI, NarrowTy);
1620 case TargetOpcode::G_EXTRACT:
1621 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1622 case TargetOpcode::G_INSERT:
1623 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1624 case TargetOpcode::G_LOAD: {
1625 auto &LoadMI = cast<GLoad>(MI);
1626 Register DstReg = LoadMI.getDstReg();
1627 LLT DstTy = MRI.getType(DstReg);
1628 if (DstTy.isVector())
1629 return UnableToLegalize;
1630
1631 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1632 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1633 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1634 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1635 LoadMI.eraseFromParent();
1636 return Legalized;
1637 }
1638
1639 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1640 }
1641 case TargetOpcode::G_ZEXTLOAD:
1642 case TargetOpcode::G_SEXTLOAD: {
1643 auto &LoadMI = cast<GExtLoad>(MI);
1644 Register DstReg = LoadMI.getDstReg();
1645 Register PtrReg = LoadMI.getPointerReg();
1646
1647 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1648 auto &MMO = LoadMI.getMMO();
1649 unsigned MemSize = MMO.getSizeInBits().getValue();
1650
1651 if (MemSize == NarrowSize) {
1652 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1653 } else if (MemSize < NarrowSize) {
1654 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1655 } else if (MemSize > NarrowSize) {
1656 // FIXME: Need to split the load.
1657 return UnableToLegalize;
1658 }
1659
1660 if (isa<GZExtLoad>(LoadMI))
1661 MIRBuilder.buildZExt(DstReg, TmpReg);
1662 else
1663 MIRBuilder.buildSExt(DstReg, TmpReg);
1664
1665 LoadMI.eraseFromParent();
1666 return Legalized;
1667 }
1668 case TargetOpcode::G_STORE: {
1669 auto &StoreMI = cast<GStore>(MI);
1670
1671 Register SrcReg = StoreMI.getValueReg();
1672 LLT SrcTy = MRI.getType(SrcReg);
1673 if (SrcTy.isVector())
1674 return UnableToLegalize;
1675
1676 int NumParts = SizeOp0 / NarrowSize;
1677 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1678 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1679 if (SrcTy.isVector() && LeftoverBits != 0)
1680 return UnableToLegalize;
1681
1682 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1683 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1684 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1685 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1686 StoreMI.eraseFromParent();
1687 return Legalized;
1688 }
1689
1690 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1691 }
1692 case TargetOpcode::G_SELECT:
1693 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1694 case TargetOpcode::G_AND:
1695 case TargetOpcode::G_OR:
1696 case TargetOpcode::G_XOR: {
1697 // Legalize bitwise operation:
1698 // A = BinOp<Ty> B, C
1699 // into:
1700 // B1, ..., BN = G_UNMERGE_VALUES B
1701 // C1, ..., CN = G_UNMERGE_VALUES C
1702 // A1 = BinOp<Ty/N> B1, C2
1703 // ...
1704 // AN = BinOp<Ty/N> BN, CN
1705 // A = G_MERGE_VALUES A1, ..., AN
1706 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1707 }
1708 case TargetOpcode::G_SHL:
1709 case TargetOpcode::G_LSHR:
1710 case TargetOpcode::G_ASHR:
1711 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1712 case TargetOpcode::G_CTLZ:
1713 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1714 case TargetOpcode::G_CTTZ:
1715 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1716 case TargetOpcode::G_CTPOP:
1717 if (TypeIdx == 1)
1718 switch (MI.getOpcode()) {
1719 case TargetOpcode::G_CTLZ:
1720 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1721 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1722 case TargetOpcode::G_CTTZ:
1723 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1724 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1725 case TargetOpcode::G_CTPOP:
1726 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1727 default:
1728 return UnableToLegalize;
1729 }
1730
1731 Observer.changingInstr(MI);
1732 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1733 Observer.changedInstr(MI);
1734 return Legalized;
1735 case TargetOpcode::G_INTTOPTR:
1736 if (TypeIdx != 1)
1737 return UnableToLegalize;
1738
1739 Observer.changingInstr(MI);
1740 narrowScalarSrc(MI, NarrowTy, 1);
1741 Observer.changedInstr(MI);
1742 return Legalized;
1743 case TargetOpcode::G_PTRTOINT:
1744 if (TypeIdx != 0)
1745 return UnableToLegalize;
1746
1747 Observer.changingInstr(MI);
1748 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1749 Observer.changedInstr(MI);
1750 return Legalized;
1751 case TargetOpcode::G_PHI: {
1752 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1753 // NarrowSize.
1754 if (SizeOp0 % NarrowSize != 0)
1755 return UnableToLegalize;
1756
1757 unsigned NumParts = SizeOp0 / NarrowSize;
1758 SmallVector<Register, 2> DstRegs(NumParts);
1759 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1760 Observer.changingInstr(MI);
1761 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1762 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1763 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1764 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1765 SrcRegs[i / 2], MIRBuilder, MRI);
1766 }
1767 MachineBasicBlock &MBB = *MI.getParent();
1768 MIRBuilder.setInsertPt(MBB, MI);
1769 for (unsigned i = 0; i < NumParts; ++i) {
1770 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1772 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1773 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1774 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1775 }
1776 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1777 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1778 Observer.changedInstr(MI);
1779 MI.eraseFromParent();
1780 return Legalized;
1781 }
1782 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1783 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1784 if (TypeIdx != 2)
1785 return UnableToLegalize;
1786
1787 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, OpIdx);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 }
1793 case TargetOpcode::G_ICMP: {
1794 Register LHS = MI.getOperand(2).getReg();
1795 LLT SrcTy = MRI.getType(LHS);
1796 CmpInst::Predicate Pred =
1797 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1798
1799 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1800 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1801 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1802 LHSLeftoverRegs, MIRBuilder, MRI))
1803 return UnableToLegalize;
1804
1805 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1806 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1807 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1808 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1809 return UnableToLegalize;
1810
1811 // We now have the LHS and RHS of the compare split into narrow-type
1812 // registers, plus potentially some leftover type.
1813 Register Dst = MI.getOperand(0).getReg();
1814 LLT ResTy = MRI.getType(Dst);
1815 if (ICmpInst::isEquality(Pred)) {
1816 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1817 // them together. For each equal part, the result should be all 0s. For
1818 // each non-equal part, we'll get at least one 1.
1819 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1821 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1822 auto LHS = std::get<0>(LHSAndRHS);
1823 auto RHS = std::get<1>(LHSAndRHS);
1824 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1825 Xors.push_back(Xor);
1826 }
1827
1828 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1829 // to the desired narrow type so that we can OR them together later.
1830 SmallVector<Register, 4> WidenedXors;
1831 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1832 auto LHS = std::get<0>(LHSAndRHS);
1833 auto RHS = std::get<1>(LHSAndRHS);
1834 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1835 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1836 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1837 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1838 llvm::append_range(Xors, WidenedXors);
1839 }
1840
1841 // Now, for each part we broke up, we know if they are equal/not equal
1842 // based off the G_XOR. We can OR these all together and compare against
1843 // 0 to get the result.
1844 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1845 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1846 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1847 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1848 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1849 } else {
1850 Register CmpIn;
1851 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1852 Register CmpOut;
1853 CmpInst::Predicate PartPred;
1854
1855 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1856 PartPred = Pred;
1857 CmpOut = Dst;
1858 } else {
1859 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1860 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1861 }
1862
1863 if (!CmpIn) {
1864 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1865 RHSPartRegs[I]);
1866 } else {
1867 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1868 RHSPartRegs[I]);
1869 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1870 LHSPartRegs[I], RHSPartRegs[I]);
1871 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1872 }
1873
1874 CmpIn = CmpOut;
1875 }
1876
1877 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1878 Register CmpOut;
1879 CmpInst::Predicate PartPred;
1880
1881 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1882 PartPred = Pred;
1883 CmpOut = Dst;
1884 } else {
1885 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1886 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1887 }
1888
1889 if (!CmpIn) {
1890 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1891 RHSLeftoverRegs[I]);
1892 } else {
1893 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1894 RHSLeftoverRegs[I]);
1895 auto CmpEq =
1896 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1897 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1898 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1899 }
1900
1901 CmpIn = CmpOut;
1902 }
1903 }
1904 MI.eraseFromParent();
1905 return Legalized;
1906 }
1907 case TargetOpcode::G_FCMP:
1908 if (TypeIdx != 0)
1909 return UnableToLegalize;
1910
1911 Observer.changingInstr(MI);
1912 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1913 Observer.changedInstr(MI);
1914 return Legalized;
1915
1916 case TargetOpcode::G_SEXT_INREG: {
1917 if (TypeIdx != 0)
1918 return UnableToLegalize;
1919
1920 int64_t SizeInBits = MI.getOperand(2).getImm();
1921
1922 // So long as the new type has more bits than the bits we're extending we
1923 // don't need to break it apart.
1924 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1925 Observer.changingInstr(MI);
1926 // We don't lose any non-extension bits by truncating the src and
1927 // sign-extending the dst.
1928 MachineOperand &MO1 = MI.getOperand(1);
1929 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1930 MO1.setReg(TruncMIB.getReg(0));
1931
1932 MachineOperand &MO2 = MI.getOperand(0);
1933 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1934 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1935 MIRBuilder.buildSExt(MO2, DstExt);
1936 MO2.setReg(DstExt);
1937 Observer.changedInstr(MI);
1938 return Legalized;
1939 }
1940
1941 // Break it apart. Components below the extension point are unmodified. The
1942 // component containing the extension point becomes a narrower SEXT_INREG.
1943 // Components above it are ashr'd from the component containing the
1944 // extension point.
1945 if (SizeOp0 % NarrowSize != 0)
1946 return UnableToLegalize;
1947 int NumParts = SizeOp0 / NarrowSize;
1948
1949 // List the registers where the destination will be scattered.
1951 // List the registers where the source will be split.
1953
1954 // Create all the temporary registers.
1955 for (int i = 0; i < NumParts; ++i) {
1956 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1957
1958 SrcRegs.push_back(SrcReg);
1959 }
1960
1961 // Explode the big arguments into smaller chunks.
1962 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1963
1964 Register AshrCstReg =
1965 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1966 .getReg(0);
1967 Register FullExtensionReg;
1968 Register PartialExtensionReg;
1969
1970 // Do the operation on each small part.
1971 for (int i = 0; i < NumParts; ++i) {
1972 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1973 DstRegs.push_back(SrcRegs[i]);
1974 PartialExtensionReg = DstRegs.back();
1975 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1976 assert(PartialExtensionReg &&
1977 "Expected to visit partial extension before full");
1978 if (FullExtensionReg) {
1979 DstRegs.push_back(FullExtensionReg);
1980 continue;
1981 }
1982 DstRegs.push_back(
1983 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1984 .getReg(0));
1985 FullExtensionReg = DstRegs.back();
1986 } else {
1987 DstRegs.push_back(
1989 .buildInstr(
1990 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1991 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1992 .getReg(0));
1993 PartialExtensionReg = DstRegs.back();
1994 }
1995 }
1996
1997 // Gather the destination registers into the final destination.
1998 Register DstReg = MI.getOperand(0).getReg();
1999 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2000 MI.eraseFromParent();
2001 return Legalized;
2002 }
2003 case TargetOpcode::G_BSWAP:
2004 case TargetOpcode::G_BITREVERSE: {
2005 if (SizeOp0 % NarrowSize != 0)
2006 return UnableToLegalize;
2007
2008 Observer.changingInstr(MI);
2009 SmallVector<Register, 2> SrcRegs, DstRegs;
2010 unsigned NumParts = SizeOp0 / NarrowSize;
2011 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2012 MIRBuilder, MRI);
2013
2014 for (unsigned i = 0; i < NumParts; ++i) {
2015 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2016 {SrcRegs[NumParts - 1 - i]});
2017 DstRegs.push_back(DstPart.getReg(0));
2018 }
2019
2020 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2021
2022 Observer.changedInstr(MI);
2023 MI.eraseFromParent();
2024 return Legalized;
2025 }
2026 case TargetOpcode::G_PTR_ADD:
2027 case TargetOpcode::G_PTRMASK: {
2028 if (TypeIdx != 1)
2029 return UnableToLegalize;
2030 Observer.changingInstr(MI);
2031 narrowScalarSrc(MI, NarrowTy, 2);
2032 Observer.changedInstr(MI);
2033 return Legalized;
2034 }
2035 case TargetOpcode::G_FPTOUI:
2036 case TargetOpcode::G_FPTOSI:
2037 case TargetOpcode::G_FPTOUI_SAT:
2038 case TargetOpcode::G_FPTOSI_SAT:
2039 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2040 case TargetOpcode::G_FPEXT:
2041 if (TypeIdx != 0)
2042 return UnableToLegalize;
2043 Observer.changingInstr(MI);
2044 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2045 Observer.changedInstr(MI);
2046 return Legalized;
2047 case TargetOpcode::G_FLDEXP:
2048 case TargetOpcode::G_STRICT_FLDEXP:
2049 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2050 case TargetOpcode::G_VSCALE: {
2051 Register Dst = MI.getOperand(0).getReg();
2052 LLT Ty = MRI.getType(Dst);
2053
2054 // Assume VSCALE(1) fits into a legal integer
2055 const APInt One(NarrowTy.getSizeInBits(), 1);
2056 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2057 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2058 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2059 MIRBuilder.buildMul(Dst, ZExt, C);
2060
2061 MI.eraseFromParent();
2062 return Legalized;
2063 }
2064 }
2065}
2066
2068 LLT Ty = MRI.getType(Val);
2069 if (Ty.isScalar())
2070 return Val;
2071
2072 const DataLayout &DL = MIRBuilder.getDataLayout();
2073 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2074 if (Ty.isPointer()) {
2075 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2076 return Register();
2077 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2078 }
2079
2080 Register NewVal = Val;
2081
2082 assert(Ty.isVector());
2083 if (Ty.isPointerVector())
2084 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2085 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2086}
2087
2089 unsigned OpIdx, unsigned ExtOpcode) {
2090 MachineOperand &MO = MI.getOperand(OpIdx);
2091 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2092 MO.setReg(ExtB.getReg(0));
2093}
2094
2096 unsigned OpIdx) {
2097 MachineOperand &MO = MI.getOperand(OpIdx);
2098 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2099 MO.setReg(ExtB.getReg(0));
2100}
2101
2103 unsigned OpIdx, unsigned TruncOpcode) {
2104 MachineOperand &MO = MI.getOperand(OpIdx);
2105 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2106 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2107 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2108 MO.setReg(DstExt);
2109}
2110
2112 unsigned OpIdx, unsigned ExtOpcode) {
2113 MachineOperand &MO = MI.getOperand(OpIdx);
2114 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2115 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2116 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2117 MO.setReg(DstTrunc);
2118}
2119
2121 unsigned OpIdx) {
2122 MachineOperand &MO = MI.getOperand(OpIdx);
2123 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2124 Register Dst = MO.getReg();
2125 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2126 MO.setReg(DstExt);
2127 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2128}
2129
2131 unsigned OpIdx) {
2132 MachineOperand &MO = MI.getOperand(OpIdx);
2133 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2134}
2135
2137 MachineOperand &Op = MI.getOperand(OpIdx);
2138 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2139}
2140
2142 MachineOperand &MO = MI.getOperand(OpIdx);
2143 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2144 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2145 MIRBuilder.buildBitcast(MO, CastDst);
2146 MO.setReg(CastDst);
2147}
2148
2150LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2151 LLT WideTy) {
2152 if (TypeIdx != 1)
2153 return UnableToLegalize;
2154
2155 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2156 if (DstTy.isVector())
2157 return UnableToLegalize;
2158
2159 LLT SrcTy = MRI.getType(Src1Reg);
2160 const int DstSize = DstTy.getSizeInBits();
2161 const int SrcSize = SrcTy.getSizeInBits();
2162 const int WideSize = WideTy.getSizeInBits();
2163 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2164
2165 unsigned NumOps = MI.getNumOperands();
2166 unsigned NumSrc = MI.getNumOperands() - 1;
2167 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2168
2169 if (WideSize >= DstSize) {
2170 // Directly pack the bits in the target type.
2171 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2172
2173 for (unsigned I = 2; I != NumOps; ++I) {
2174 const unsigned Offset = (I - 1) * PartSize;
2175
2176 Register SrcReg = MI.getOperand(I).getReg();
2177 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2178
2179 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2180
2181 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2182 MRI.createGenericVirtualRegister(WideTy);
2183
2184 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2185 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2186 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2187 ResultReg = NextResult;
2188 }
2189
2190 if (WideSize > DstSize)
2191 MIRBuilder.buildTrunc(DstReg, ResultReg);
2192 else if (DstTy.isPointer())
2193 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2194
2195 MI.eraseFromParent();
2196 return Legalized;
2197 }
2198
2199 // Unmerge the original values to the GCD type, and recombine to the next
2200 // multiple greater than the original type.
2201 //
2202 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2203 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2204 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2205 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2206 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2207 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2208 // %12:_(s12) = G_MERGE_VALUES %10, %11
2209 //
2210 // Padding with undef if necessary:
2211 //
2212 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2213 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2214 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2215 // %7:_(s2) = G_IMPLICIT_DEF
2216 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2217 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2218 // %10:_(s12) = G_MERGE_VALUES %8, %9
2219
2220 const int GCD = std::gcd(SrcSize, WideSize);
2221 LLT GCDTy = LLT::scalar(GCD);
2222
2223 SmallVector<Register, 8> NewMergeRegs;
2224 SmallVector<Register, 8> Unmerges;
2225 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2226
2227 // Decompose the original operands if they don't evenly divide.
2228 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2229 Register SrcReg = MO.getReg();
2230 if (GCD == SrcSize) {
2231 Unmerges.push_back(SrcReg);
2232 } else {
2233 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2234 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2235 Unmerges.push_back(Unmerge.getReg(J));
2236 }
2237 }
2238
2239 // Pad with undef to the next size that is a multiple of the requested size.
2240 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2241 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2242 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2243 Unmerges.push_back(UndefReg);
2244 }
2245
2246 const int PartsPerGCD = WideSize / GCD;
2247
2248 // Build merges of each piece.
2249 ArrayRef<Register> Slicer(Unmerges);
2250 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2251 auto Merge =
2252 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2253 NewMergeRegs.push_back(Merge.getReg(0));
2254 }
2255
2256 // A truncate may be necessary if the requested type doesn't evenly divide the
2257 // original result type.
2258 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2259 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2260 } else {
2261 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2262 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2263 }
2264
2265 MI.eraseFromParent();
2266 return Legalized;
2267}
2268
2270LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2271 LLT WideTy) {
2272 if (TypeIdx != 0)
2273 return UnableToLegalize;
2274
2275 int NumDst = MI.getNumOperands() - 1;
2276 Register SrcReg = MI.getOperand(NumDst).getReg();
2277 LLT SrcTy = MRI.getType(SrcReg);
2278 if (SrcTy.isVector())
2279 return UnableToLegalize;
2280
2281 Register Dst0Reg = MI.getOperand(0).getReg();
2282 LLT DstTy = MRI.getType(Dst0Reg);
2283 if (!DstTy.isScalar())
2284 return UnableToLegalize;
2285
2286 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2287 if (SrcTy.isPointer()) {
2288 const DataLayout &DL = MIRBuilder.getDataLayout();
2289 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2290 LLVM_DEBUG(
2291 dbgs() << "Not casting non-integral address space integer\n");
2292 return UnableToLegalize;
2293 }
2294
2295 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2296 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2297 }
2298
2299 // Widen SrcTy to WideTy. This does not affect the result, but since the
2300 // user requested this size, it is probably better handled than SrcTy and
2301 // should reduce the total number of legalization artifacts.
2302 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2303 SrcTy = WideTy;
2304 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2305 }
2306
2307 // Theres no unmerge type to target. Directly extract the bits from the
2308 // source type
2309 unsigned DstSize = DstTy.getSizeInBits();
2310
2311 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2312 for (int I = 1; I != NumDst; ++I) {
2313 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2314 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2315 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2316 }
2317
2318 MI.eraseFromParent();
2319 return Legalized;
2320 }
2321
2322 // Extend the source to a wider type.
2323 LLT LCMTy = getLCMType(SrcTy, WideTy);
2324
2325 Register WideSrc = SrcReg;
2326 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2327 // TODO: If this is an integral address space, cast to integer and anyext.
2328 if (SrcTy.isPointer()) {
2329 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2330 return UnableToLegalize;
2331 }
2332
2333 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2334 }
2335
2336 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2337
2338 // Create a sequence of unmerges and merges to the original results. Since we
2339 // may have widened the source, we will need to pad the results with dead defs
2340 // to cover the source register.
2341 // e.g. widen s48 to s64:
2342 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2343 //
2344 // =>
2345 // %4:_(s192) = G_ANYEXT %0:_(s96)
2346 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2347 // ; unpack to GCD type, with extra dead defs
2348 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2349 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2350 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2351 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2352 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2353 const LLT GCDTy = getGCDType(WideTy, DstTy);
2354 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2355 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2356
2357 // Directly unmerge to the destination without going through a GCD type
2358 // if possible
2359 if (PartsPerRemerge == 1) {
2360 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2361
2362 for (int I = 0; I != NumUnmerge; ++I) {
2363 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2364
2365 for (int J = 0; J != PartsPerUnmerge; ++J) {
2366 int Idx = I * PartsPerUnmerge + J;
2367 if (Idx < NumDst)
2368 MIB.addDef(MI.getOperand(Idx).getReg());
2369 else {
2370 // Create dead def for excess components.
2371 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2372 }
2373 }
2374
2375 MIB.addUse(Unmerge.getReg(I));
2376 }
2377 } else {
2378 SmallVector<Register, 16> Parts;
2379 for (int J = 0; J != NumUnmerge; ++J)
2380 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2381
2382 SmallVector<Register, 8> RemergeParts;
2383 for (int I = 0; I != NumDst; ++I) {
2384 for (int J = 0; J < PartsPerRemerge; ++J) {
2385 const int Idx = I * PartsPerRemerge + J;
2386 RemergeParts.emplace_back(Parts[Idx]);
2387 }
2388
2389 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2390 RemergeParts.clear();
2391 }
2392 }
2393
2394 MI.eraseFromParent();
2395 return Legalized;
2396}
2397
2399LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2400 LLT WideTy) {
2401 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2402 unsigned Offset = MI.getOperand(2).getImm();
2403
2404 if (TypeIdx == 0) {
2405 if (SrcTy.isVector() || DstTy.isVector())
2406 return UnableToLegalize;
2407
2408 SrcOp Src(SrcReg);
2409 if (SrcTy.isPointer()) {
2410 // Extracts from pointers can be handled only if they are really just
2411 // simple integers.
2412 const DataLayout &DL = MIRBuilder.getDataLayout();
2413 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2414 return UnableToLegalize;
2415
2416 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2417 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2418 SrcTy = SrcAsIntTy;
2419 }
2420
2421 if (DstTy.isPointer())
2422 return UnableToLegalize;
2423
2424 if (Offset == 0) {
2425 // Avoid a shift in the degenerate case.
2426 MIRBuilder.buildTrunc(DstReg,
2427 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2428 MI.eraseFromParent();
2429 return Legalized;
2430 }
2431
2432 // Do a shift in the source type.
2433 LLT ShiftTy = SrcTy;
2434 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2435 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2436 ShiftTy = WideTy;
2437 }
2438
2439 auto LShr = MIRBuilder.buildLShr(
2440 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2441 MIRBuilder.buildTrunc(DstReg, LShr);
2442 MI.eraseFromParent();
2443 return Legalized;
2444 }
2445
2446 if (SrcTy.isScalar()) {
2447 Observer.changingInstr(MI);
2448 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2449 Observer.changedInstr(MI);
2450 return Legalized;
2451 }
2452
2453 if (!SrcTy.isVector())
2454 return UnableToLegalize;
2455
2456 if (DstTy != SrcTy.getElementType())
2457 return UnableToLegalize;
2458
2459 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2460 return UnableToLegalize;
2461
2462 Observer.changingInstr(MI);
2463 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2464
2465 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2466 Offset);
2467 widenScalarDst(MI, WideTy.getScalarType(), 0);
2468 Observer.changedInstr(MI);
2469 return Legalized;
2470}
2471
2473LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2474 LLT WideTy) {
2475 if (TypeIdx != 0 || WideTy.isVector())
2476 return UnableToLegalize;
2477 Observer.changingInstr(MI);
2478 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2479 widenScalarDst(MI, WideTy);
2480 Observer.changedInstr(MI);
2481 return Legalized;
2482}
2483
2485LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2486 LLT WideTy) {
2487 unsigned Opcode;
2488 unsigned ExtOpcode;
2489 std::optional<Register> CarryIn;
2490 switch (MI.getOpcode()) {
2491 default:
2492 llvm_unreachable("Unexpected opcode!");
2493 case TargetOpcode::G_SADDO:
2494 Opcode = TargetOpcode::G_ADD;
2495 ExtOpcode = TargetOpcode::G_SEXT;
2496 break;
2497 case TargetOpcode::G_SSUBO:
2498 Opcode = TargetOpcode::G_SUB;
2499 ExtOpcode = TargetOpcode::G_SEXT;
2500 break;
2501 case TargetOpcode::G_UADDO:
2502 Opcode = TargetOpcode::G_ADD;
2503 ExtOpcode = TargetOpcode::G_ZEXT;
2504 break;
2505 case TargetOpcode::G_USUBO:
2506 Opcode = TargetOpcode::G_SUB;
2507 ExtOpcode = TargetOpcode::G_ZEXT;
2508 break;
2509 case TargetOpcode::G_SADDE:
2510 Opcode = TargetOpcode::G_UADDE;
2511 ExtOpcode = TargetOpcode::G_SEXT;
2512 CarryIn = MI.getOperand(4).getReg();
2513 break;
2514 case TargetOpcode::G_SSUBE:
2515 Opcode = TargetOpcode::G_USUBE;
2516 ExtOpcode = TargetOpcode::G_SEXT;
2517 CarryIn = MI.getOperand(4).getReg();
2518 break;
2519 case TargetOpcode::G_UADDE:
2520 Opcode = TargetOpcode::G_UADDE;
2521 ExtOpcode = TargetOpcode::G_ZEXT;
2522 CarryIn = MI.getOperand(4).getReg();
2523 break;
2524 case TargetOpcode::G_USUBE:
2525 Opcode = TargetOpcode::G_USUBE;
2526 ExtOpcode = TargetOpcode::G_ZEXT;
2527 CarryIn = MI.getOperand(4).getReg();
2528 break;
2529 }
2530
2531 if (TypeIdx == 1) {
2532 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2533
2534 Observer.changingInstr(MI);
2535 if (CarryIn)
2536 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2537 widenScalarDst(MI, WideTy, 1);
2538
2539 Observer.changedInstr(MI);
2540 return Legalized;
2541 }
2542
2543 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2544 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2545 // Do the arithmetic in the larger type.
2546 Register NewOp;
2547 if (CarryIn) {
2548 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2549 NewOp = MIRBuilder
2550 .buildInstr(Opcode, {WideTy, CarryOutTy},
2551 {LHSExt, RHSExt, *CarryIn})
2552 .getReg(0);
2553 } else {
2554 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2555 }
2556 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2557 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2558 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2559 // There is no overflow if the ExtOp is the same as NewOp.
2560 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2561 // Now trunc the NewOp to the original result.
2562 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2563 MI.eraseFromParent();
2564 return Legalized;
2565}
2566
2568LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2569 LLT WideTy) {
2570 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2571 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2572 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2573 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2574 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2575 // We can convert this to:
2576 // 1. Any extend iN to iM
2577 // 2. SHL by M-N
2578 // 3. [US][ADD|SUB|SHL]SAT
2579 // 4. L/ASHR by M-N
2580 //
2581 // It may be more efficient to lower this to a min and a max operation in
2582 // the higher precision arithmetic if the promoted operation isn't legal,
2583 // but this decision is up to the target's lowering request.
2584 Register DstReg = MI.getOperand(0).getReg();
2585
2586 unsigned NewBits = WideTy.getScalarSizeInBits();
2587 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2588
2589 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2590 // must not left shift the RHS to preserve the shift amount.
2591 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2592 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2593 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2594 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2595 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2596 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2597
2598 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2599 {ShiftL, ShiftR}, MI.getFlags());
2600
2601 // Use a shift that will preserve the number of sign bits when the trunc is
2602 // folded away.
2603 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2604 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2605
2606 MIRBuilder.buildTrunc(DstReg, Result);
2607 MI.eraseFromParent();
2608 return Legalized;
2609}
2610
2612LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2613 LLT WideTy) {
2614 if (TypeIdx == 1) {
2615 Observer.changingInstr(MI);
2616 widenScalarDst(MI, WideTy, 1);
2617 Observer.changedInstr(MI);
2618 return Legalized;
2619 }
2620
2621 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2622 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2623 LLT SrcTy = MRI.getType(LHS);
2624 LLT OverflowTy = MRI.getType(OriginalOverflow);
2625 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2626
2627 // To determine if the result overflowed in the larger type, we extend the
2628 // input to the larger type, do the multiply (checking if it overflows),
2629 // then also check the high bits of the result to see if overflow happened
2630 // there.
2631 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2632 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2633 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2634
2635 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2636 // so we don't need to check the overflow result of larger type Mulo.
2637 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2638
2639 unsigned MulOpc =
2640 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2641
2642 MachineInstrBuilder Mulo;
2643 if (WideMulCanOverflow)
2644 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2645 {LeftOperand, RightOperand});
2646 else
2647 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2648
2649 auto Mul = Mulo->getOperand(0);
2650 MIRBuilder.buildTrunc(Result, Mul);
2651
2652 MachineInstrBuilder ExtResult;
2653 // Overflow occurred if it occurred in the larger type, or if the high part
2654 // of the result does not zero/sign-extend the low part. Check this second
2655 // possibility first.
2656 if (IsSigned) {
2657 // For signed, overflow occurred when the high part does not sign-extend
2658 // the low part.
2659 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2660 } else {
2661 // Unsigned overflow occurred when the high part does not zero-extend the
2662 // low part.
2663 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2664 }
2665
2666 if (WideMulCanOverflow) {
2667 auto Overflow =
2668 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2669 // Finally check if the multiplication in the larger type itself overflowed.
2670 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2671 } else {
2672 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2673 }
2674 MI.eraseFromParent();
2675 return Legalized;
2676}
2677
2680 unsigned Opcode = MI.getOpcode();
2681 switch (Opcode) {
2682 default:
2683 return UnableToLegalize;
2684 case TargetOpcode::G_ATOMICRMW_XCHG:
2685 case TargetOpcode::G_ATOMICRMW_ADD:
2686 case TargetOpcode::G_ATOMICRMW_SUB:
2687 case TargetOpcode::G_ATOMICRMW_AND:
2688 case TargetOpcode::G_ATOMICRMW_OR:
2689 case TargetOpcode::G_ATOMICRMW_XOR:
2690 case TargetOpcode::G_ATOMICRMW_MIN:
2691 case TargetOpcode::G_ATOMICRMW_MAX:
2692 case TargetOpcode::G_ATOMICRMW_UMIN:
2693 case TargetOpcode::G_ATOMICRMW_UMAX:
2694 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2695 Observer.changingInstr(MI);
2696 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2697 widenScalarDst(MI, WideTy, 0);
2698 Observer.changedInstr(MI);
2699 return Legalized;
2700 case TargetOpcode::G_ATOMIC_CMPXCHG:
2701 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2702 Observer.changingInstr(MI);
2703 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2704 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2705 widenScalarDst(MI, WideTy, 0);
2706 Observer.changedInstr(MI);
2707 return Legalized;
2708 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2709 if (TypeIdx == 0) {
2710 Observer.changingInstr(MI);
2711 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2712 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2713 widenScalarDst(MI, WideTy, 0);
2714 Observer.changedInstr(MI);
2715 return Legalized;
2716 }
2717 assert(TypeIdx == 1 &&
2718 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2719 Observer.changingInstr(MI);
2720 widenScalarDst(MI, WideTy, 1);
2721 Observer.changedInstr(MI);
2722 return Legalized;
2723 case TargetOpcode::G_EXTRACT:
2724 return widenScalarExtract(MI, TypeIdx, WideTy);
2725 case TargetOpcode::G_INSERT:
2726 return widenScalarInsert(MI, TypeIdx, WideTy);
2727 case TargetOpcode::G_MERGE_VALUES:
2728 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2729 case TargetOpcode::G_UNMERGE_VALUES:
2730 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2731 case TargetOpcode::G_SADDO:
2732 case TargetOpcode::G_SSUBO:
2733 case TargetOpcode::G_UADDO:
2734 case TargetOpcode::G_USUBO:
2735 case TargetOpcode::G_SADDE:
2736 case TargetOpcode::G_SSUBE:
2737 case TargetOpcode::G_UADDE:
2738 case TargetOpcode::G_USUBE:
2739 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2740 case TargetOpcode::G_UMULO:
2741 case TargetOpcode::G_SMULO:
2742 return widenScalarMulo(MI, TypeIdx, WideTy);
2743 case TargetOpcode::G_SADDSAT:
2744 case TargetOpcode::G_SSUBSAT:
2745 case TargetOpcode::G_SSHLSAT:
2746 case TargetOpcode::G_UADDSAT:
2747 case TargetOpcode::G_USUBSAT:
2748 case TargetOpcode::G_USHLSAT:
2749 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2750 case TargetOpcode::G_CTTZ:
2751 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2752 case TargetOpcode::G_CTLZ:
2753 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2754 case TargetOpcode::G_CTPOP: {
2755 if (TypeIdx == 0) {
2756 Observer.changingInstr(MI);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 }
2761
2762 Register SrcReg = MI.getOperand(1).getReg();
2763
2764 // First extend the input.
2765 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2766 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2767 ? TargetOpcode::G_ANYEXT
2768 : TargetOpcode::G_ZEXT;
2769 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2770 LLT CurTy = MRI.getType(SrcReg);
2771 unsigned NewOpc = Opcode;
2772 if (NewOpc == TargetOpcode::G_CTTZ) {
2773 // The count is the same in the larger type except if the original
2774 // value was zero. This can be handled by setting the bit just off
2775 // the top of the original type.
2776 auto TopBit =
2778 MIBSrc = MIRBuilder.buildOr(
2779 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2780 // Now we know the operand is non-zero, use the more relaxed opcode.
2781 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2782 }
2783
2784 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2785
2786 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2787 // An optimization where the result is the CTLZ after the left shift by
2788 // (Difference in widety and current ty), that is,
2789 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2790 // Result = ctlz MIBSrc
2791 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2792 MIRBuilder.buildConstant(WideTy, SizeDiff));
2793 }
2794
2795 // Perform the operation at the larger size.
2796 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2797 // This is already the correct result for CTPOP and CTTZs
2798 if (Opcode == TargetOpcode::G_CTLZ) {
2799 // The correct result is NewOp - (Difference in widety and current ty).
2800 MIBNewOp = MIRBuilder.buildSub(
2801 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2802 }
2803
2804 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2805 MI.eraseFromParent();
2806 return Legalized;
2807 }
2808 case TargetOpcode::G_BSWAP: {
2809 Observer.changingInstr(MI);
2810 Register DstReg = MI.getOperand(0).getReg();
2811
2812 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2813 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2814 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2815 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2816
2817 MI.getOperand(0).setReg(DstExt);
2818
2819 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2820
2821 LLT Ty = MRI.getType(DstReg);
2822 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2823 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2824 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2825
2826 MIRBuilder.buildTrunc(DstReg, ShrReg);
2827 Observer.changedInstr(MI);
2828 return Legalized;
2829 }
2830 case TargetOpcode::G_BITREVERSE: {
2831 Observer.changingInstr(MI);
2832
2833 Register DstReg = MI.getOperand(0).getReg();
2834 LLT Ty = MRI.getType(DstReg);
2835 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2836
2837 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2838 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2839 MI.getOperand(0).setReg(DstExt);
2840 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2841
2842 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2843 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2844 MIRBuilder.buildTrunc(DstReg, Shift);
2845 Observer.changedInstr(MI);
2846 return Legalized;
2847 }
2848 case TargetOpcode::G_FREEZE:
2849 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2850 Observer.changingInstr(MI);
2851 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2852 widenScalarDst(MI, WideTy);
2853 Observer.changedInstr(MI);
2854 return Legalized;
2855
2856 case TargetOpcode::G_ABS:
2857 Observer.changingInstr(MI);
2858 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2859 widenScalarDst(MI, WideTy);
2860 Observer.changedInstr(MI);
2861 return Legalized;
2862
2863 case TargetOpcode::G_ADD:
2864 case TargetOpcode::G_AND:
2865 case TargetOpcode::G_MUL:
2866 case TargetOpcode::G_OR:
2867 case TargetOpcode::G_XOR:
2868 case TargetOpcode::G_SUB:
2869 case TargetOpcode::G_SHUFFLE_VECTOR:
2870 // Perform operation at larger width (any extension is fines here, high bits
2871 // don't affect the result) and then truncate the result back to the
2872 // original type.
2873 Observer.changingInstr(MI);
2874 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2875 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2876 widenScalarDst(MI, WideTy);
2877 Observer.changedInstr(MI);
2878 return Legalized;
2879
2880 case TargetOpcode::G_SBFX:
2881 case TargetOpcode::G_UBFX:
2882 Observer.changingInstr(MI);
2883
2884 if (TypeIdx == 0) {
2885 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2886 widenScalarDst(MI, WideTy);
2887 } else {
2888 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2889 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2890 }
2891
2892 Observer.changedInstr(MI);
2893 return Legalized;
2894
2895 case TargetOpcode::G_SHL:
2896 Observer.changingInstr(MI);
2897
2898 if (TypeIdx == 0) {
2899 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2900 widenScalarDst(MI, WideTy);
2901 } else {
2902 assert(TypeIdx == 1);
2903 // The "number of bits to shift" operand must preserve its value as an
2904 // unsigned integer:
2905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2906 }
2907
2908 Observer.changedInstr(MI);
2909 return Legalized;
2910
2911 case TargetOpcode::G_ROTR:
2912 case TargetOpcode::G_ROTL:
2913 if (TypeIdx != 1)
2914 return UnableToLegalize;
2915
2916 Observer.changingInstr(MI);
2917 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2918 Observer.changedInstr(MI);
2919 return Legalized;
2920
2921 case TargetOpcode::G_SDIV:
2922 case TargetOpcode::G_SREM:
2923 case TargetOpcode::G_SMIN:
2924 case TargetOpcode::G_SMAX:
2925 case TargetOpcode::G_ABDS:
2926 Observer.changingInstr(MI);
2927 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2928 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2929 widenScalarDst(MI, WideTy);
2930 Observer.changedInstr(MI);
2931 return Legalized;
2932
2933 case TargetOpcode::G_SDIVREM:
2934 Observer.changingInstr(MI);
2935 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2936 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2937 widenScalarDst(MI, WideTy);
2938 widenScalarDst(MI, WideTy, 1);
2939 Observer.changedInstr(MI);
2940 return Legalized;
2941
2942 case TargetOpcode::G_ASHR:
2943 case TargetOpcode::G_LSHR:
2944 Observer.changingInstr(MI);
2945
2946 if (TypeIdx == 0) {
2947 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2948 : TargetOpcode::G_ZEXT;
2949
2950 widenScalarSrc(MI, WideTy, 1, CvtOp);
2951 widenScalarDst(MI, WideTy);
2952 } else {
2953 assert(TypeIdx == 1);
2954 // The "number of bits to shift" operand must preserve its value as an
2955 // unsigned integer:
2956 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2957 }
2958
2959 Observer.changedInstr(MI);
2960 return Legalized;
2961 case TargetOpcode::G_UDIV:
2962 case TargetOpcode::G_UREM:
2963 case TargetOpcode::G_ABDU:
2964 Observer.changingInstr(MI);
2965 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2966 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2967 widenScalarDst(MI, WideTy);
2968 Observer.changedInstr(MI);
2969 return Legalized;
2970 case TargetOpcode::G_UDIVREM:
2971 Observer.changingInstr(MI);
2972 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2973 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2974 widenScalarDst(MI, WideTy);
2975 widenScalarDst(MI, WideTy, 1);
2976 Observer.changedInstr(MI);
2977 return Legalized;
2978 case TargetOpcode::G_UMIN:
2979 case TargetOpcode::G_UMAX: {
2980 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2981
2982 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
2983 unsigned ExtOpc =
2984 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
2985 getApproximateEVTForLLT(WideTy, Ctx))
2986 ? TargetOpcode::G_SEXT
2987 : TargetOpcode::G_ZEXT;
2988
2989 Observer.changingInstr(MI);
2990 widenScalarSrc(MI, WideTy, 1, ExtOpc);
2991 widenScalarSrc(MI, WideTy, 2, ExtOpc);
2992 widenScalarDst(MI, WideTy);
2993 Observer.changedInstr(MI);
2994 return Legalized;
2995 }
2996
2997 case TargetOpcode::G_SELECT:
2998 Observer.changingInstr(MI);
2999 if (TypeIdx == 0) {
3000 // Perform operation at larger width (any extension is fine here, high
3001 // bits don't affect the result) and then truncate the result back to the
3002 // original type.
3003 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3004 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3005 widenScalarDst(MI, WideTy);
3006 } else {
3007 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3008 // Explicit extension is required here since high bits affect the result.
3009 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3010 }
3011 Observer.changedInstr(MI);
3012 return Legalized;
3013
3014 case TargetOpcode::G_FPTOSI:
3015 case TargetOpcode::G_FPTOUI:
3016 case TargetOpcode::G_INTRINSIC_LRINT:
3017 case TargetOpcode::G_INTRINSIC_LLRINT:
3018 case TargetOpcode::G_IS_FPCLASS:
3019 Observer.changingInstr(MI);
3020
3021 if (TypeIdx == 0)
3022 widenScalarDst(MI, WideTy);
3023 else
3024 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3025
3026 Observer.changedInstr(MI);
3027 return Legalized;
3028 case TargetOpcode::G_SITOFP:
3029 Observer.changingInstr(MI);
3030
3031 if (TypeIdx == 0)
3032 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3033 else
3034 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3035
3036 Observer.changedInstr(MI);
3037 return Legalized;
3038 case TargetOpcode::G_UITOFP:
3039 Observer.changingInstr(MI);
3040
3041 if (TypeIdx == 0)
3042 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3043 else
3044 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3045
3046 Observer.changedInstr(MI);
3047 return Legalized;
3048 case TargetOpcode::G_FPTOSI_SAT:
3049 case TargetOpcode::G_FPTOUI_SAT:
3050 Observer.changingInstr(MI);
3051
3052 if (TypeIdx == 0) {
3053 Register OldDst = MI.getOperand(0).getReg();
3054 LLT Ty = MRI.getType(OldDst);
3055 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3056 Register NewDst;
3057 MI.getOperand(0).setReg(ExtReg);
3058 uint64_t ShortBits = Ty.getScalarSizeInBits();
3059 uint64_t WideBits = WideTy.getScalarSizeInBits();
3060 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3061 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3062 // z = i16 fptosi_sat(a)
3063 // ->
3064 // x = i32 fptosi_sat(a)
3065 // y = smin(x, 32767)
3066 // z = smax(y, -32768)
3067 auto MaxVal = MIRBuilder.buildConstant(
3068 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3069 auto MinVal = MIRBuilder.buildConstant(
3070 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3071 Register MidReg =
3072 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3073 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3074 } else {
3075 // z = i16 fptoui_sat(a)
3076 // ->
3077 // x = i32 fptoui_sat(a)
3078 // y = smin(x, 65535)
3079 auto MaxVal = MIRBuilder.buildConstant(
3080 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3081 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3082 }
3083 MIRBuilder.buildTrunc(OldDst, NewDst);
3084 } else
3085 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3086
3087 Observer.changedInstr(MI);
3088 return Legalized;
3089 case TargetOpcode::G_LOAD:
3090 case TargetOpcode::G_SEXTLOAD:
3091 case TargetOpcode::G_ZEXTLOAD:
3092 Observer.changingInstr(MI);
3093 widenScalarDst(MI, WideTy);
3094 Observer.changedInstr(MI);
3095 return Legalized;
3096
3097 case TargetOpcode::G_STORE: {
3098 if (TypeIdx != 0)
3099 return UnableToLegalize;
3100
3101 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3102 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3103 if (!Ty.isScalar()) {
3104 // We need to widen the vector element type.
3105 Observer.changingInstr(MI);
3106 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3107 // We also need to adjust the MMO to turn this into a truncating store.
3108 MachineMemOperand &MMO = **MI.memoperands_begin();
3109 MachineFunction &MF = MIRBuilder.getMF();
3110 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3111 MI.setMemRefs(MF, {NewMMO});
3112 Observer.changedInstr(MI);
3113 return Legalized;
3114 }
3115
3116 Observer.changingInstr(MI);
3117
3118 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3119 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3120 widenScalarSrc(MI, WideTy, 0, ExtType);
3121
3122 Observer.changedInstr(MI);
3123 return Legalized;
3124 }
3125 case TargetOpcode::G_CONSTANT: {
3126 MachineOperand &SrcMO = MI.getOperand(1);
3127 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3128 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3129 MRI.getType(MI.getOperand(0).getReg()));
3130 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3131 ExtOpc == TargetOpcode::G_ANYEXT) &&
3132 "Illegal Extend");
3133 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3134 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3135 ? SrcVal.sext(WideTy.getSizeInBits())
3136 : SrcVal.zext(WideTy.getSizeInBits());
3137 Observer.changingInstr(MI);
3138 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3139
3140 widenScalarDst(MI, WideTy);
3141 Observer.changedInstr(MI);
3142 return Legalized;
3143 }
3144 case TargetOpcode::G_FCONSTANT: {
3145 // To avoid changing the bits of the constant due to extension to a larger
3146 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3147 MachineOperand &SrcMO = MI.getOperand(1);
3148 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3149 MIRBuilder.setInstrAndDebugLoc(MI);
3150 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3151 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3152 MI.eraseFromParent();
3153 return Legalized;
3154 }
3155 case TargetOpcode::G_IMPLICIT_DEF: {
3156 Observer.changingInstr(MI);
3157 widenScalarDst(MI, WideTy);
3158 Observer.changedInstr(MI);
3159 return Legalized;
3160 }
3161 case TargetOpcode::G_BRCOND:
3162 Observer.changingInstr(MI);
3163 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3164 Observer.changedInstr(MI);
3165 return Legalized;
3166
3167 case TargetOpcode::G_FCMP:
3168 Observer.changingInstr(MI);
3169 if (TypeIdx == 0)
3170 widenScalarDst(MI, WideTy);
3171 else {
3172 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3173 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3174 }
3175 Observer.changedInstr(MI);
3176 return Legalized;
3177
3178 case TargetOpcode::G_ICMP:
3179 Observer.changingInstr(MI);
3180 if (TypeIdx == 0)
3181 widenScalarDst(MI, WideTy);
3182 else {
3183 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3184 CmpInst::Predicate Pred =
3185 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3186
3187 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3188 unsigned ExtOpcode =
3189 (CmpInst::isSigned(Pred) ||
3190 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3191 getApproximateEVTForLLT(WideTy, Ctx)))
3192 ? TargetOpcode::G_SEXT
3193 : TargetOpcode::G_ZEXT;
3194 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3195 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3196 }
3197 Observer.changedInstr(MI);
3198 return Legalized;
3199
3200 case TargetOpcode::G_PTR_ADD:
3201 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3202 Observer.changingInstr(MI);
3203 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3204 Observer.changedInstr(MI);
3205 return Legalized;
3206
3207 case TargetOpcode::G_PHI: {
3208 assert(TypeIdx == 0 && "Expecting only Idx 0");
3209
3210 Observer.changingInstr(MI);
3211 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3212 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3213 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3214 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3215 }
3216
3217 MachineBasicBlock &MBB = *MI.getParent();
3218 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3219 widenScalarDst(MI, WideTy);
3220 Observer.changedInstr(MI);
3221 return Legalized;
3222 }
3223 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3224 if (TypeIdx == 0) {
3225 Register VecReg = MI.getOperand(1).getReg();
3226 LLT VecTy = MRI.getType(VecReg);
3227 Observer.changingInstr(MI);
3228
3230 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
3231 TargetOpcode::G_ANYEXT);
3232
3233 widenScalarDst(MI, WideTy, 0);
3234 Observer.changedInstr(MI);
3235 return Legalized;
3236 }
3237
3238 if (TypeIdx != 2)
3239 return UnableToLegalize;
3240 Observer.changingInstr(MI);
3241 // TODO: Probably should be zext
3242 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3243 Observer.changedInstr(MI);
3244 return Legalized;
3245 }
3246 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3247 if (TypeIdx == 0) {
3248 Observer.changingInstr(MI);
3249 const LLT WideEltTy = WideTy.getElementType();
3250
3251 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3252 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3253 widenScalarDst(MI, WideTy, 0);
3254 Observer.changedInstr(MI);
3255 return Legalized;
3256 }
3257
3258 if (TypeIdx == 1) {
3259 Observer.changingInstr(MI);
3260
3261 Register VecReg = MI.getOperand(1).getReg();
3262 LLT VecTy = MRI.getType(VecReg);
3263 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
3264
3265 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3266 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3267 widenScalarDst(MI, WideVecTy, 0);
3268 Observer.changedInstr(MI);
3269 return Legalized;
3270 }
3271
3272 if (TypeIdx == 2) {
3273 Observer.changingInstr(MI);
3274 // TODO: Probably should be zext
3275 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3276 Observer.changedInstr(MI);
3277 return Legalized;
3278 }
3279
3280 return UnableToLegalize;
3281 }
3282 case TargetOpcode::G_FADD:
3283 case TargetOpcode::G_FMUL:
3284 case TargetOpcode::G_FSUB:
3285 case TargetOpcode::G_FMA:
3286 case TargetOpcode::G_FMAD:
3287 case TargetOpcode::G_FNEG:
3288 case TargetOpcode::G_FABS:
3289 case TargetOpcode::G_FCANONICALIZE:
3290 case TargetOpcode::G_FMINNUM:
3291 case TargetOpcode::G_FMAXNUM:
3292 case TargetOpcode::G_FMINNUM_IEEE:
3293 case TargetOpcode::G_FMAXNUM_IEEE:
3294 case TargetOpcode::G_FMINIMUM:
3295 case TargetOpcode::G_FMAXIMUM:
3296 case TargetOpcode::G_FMINIMUMNUM:
3297 case TargetOpcode::G_FMAXIMUMNUM:
3298 case TargetOpcode::G_FDIV:
3299 case TargetOpcode::G_FREM:
3300 case TargetOpcode::G_FCEIL:
3301 case TargetOpcode::G_FFLOOR:
3302 case TargetOpcode::G_FCOS:
3303 case TargetOpcode::G_FSIN:
3304 case TargetOpcode::G_FTAN:
3305 case TargetOpcode::G_FACOS:
3306 case TargetOpcode::G_FASIN:
3307 case TargetOpcode::G_FATAN:
3308 case TargetOpcode::G_FATAN2:
3309 case TargetOpcode::G_FCOSH:
3310 case TargetOpcode::G_FSINH:
3311 case TargetOpcode::G_FTANH:
3312 case TargetOpcode::G_FLOG10:
3313 case TargetOpcode::G_FLOG:
3314 case TargetOpcode::G_FLOG2:
3315 case TargetOpcode::G_FRINT:
3316 case TargetOpcode::G_FNEARBYINT:
3317 case TargetOpcode::G_FSQRT:
3318 case TargetOpcode::G_FEXP:
3319 case TargetOpcode::G_FEXP2:
3320 case TargetOpcode::G_FEXP10:
3321 case TargetOpcode::G_FPOW:
3322 case TargetOpcode::G_INTRINSIC_TRUNC:
3323 case TargetOpcode::G_INTRINSIC_ROUND:
3324 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3325 assert(TypeIdx == 0);
3326 Observer.changingInstr(MI);
3327
3328 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3329 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3330
3331 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3332 Observer.changedInstr(MI);
3333 return Legalized;
3334 case TargetOpcode::G_FPOWI:
3335 case TargetOpcode::G_FLDEXP:
3336 case TargetOpcode::G_STRICT_FLDEXP: {
3337 if (TypeIdx == 0) {
3338 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3339 return UnableToLegalize;
3340
3341 Observer.changingInstr(MI);
3342 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3343 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3344 Observer.changedInstr(MI);
3345 return Legalized;
3346 }
3347
3348 if (TypeIdx == 1) {
3349 // For some reason SelectionDAG tries to promote to a libcall without
3350 // actually changing the integer type for promotion.
3351 Observer.changingInstr(MI);
3352 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3353 Observer.changedInstr(MI);
3354 return Legalized;
3355 }
3356
3357 return UnableToLegalize;
3358 }
3359 case TargetOpcode::G_FFREXP: {
3360 Observer.changingInstr(MI);
3361
3362 if (TypeIdx == 0) {
3363 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3364 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3365 } else {
3366 widenScalarDst(MI, WideTy, 1);
3367 }
3368
3369 Observer.changedInstr(MI);
3370 return Legalized;
3371 }
3372 case TargetOpcode::G_INTTOPTR:
3373 if (TypeIdx != 1)
3374 return UnableToLegalize;
3375
3376 Observer.changingInstr(MI);
3377 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3378 Observer.changedInstr(MI);
3379 return Legalized;
3380 case TargetOpcode::G_PTRTOINT:
3381 if (TypeIdx != 0)
3382 return UnableToLegalize;
3383
3384 Observer.changingInstr(MI);
3385 widenScalarDst(MI, WideTy, 0);
3386 Observer.changedInstr(MI);
3387 return Legalized;
3388 case TargetOpcode::G_BUILD_VECTOR: {
3389 Observer.changingInstr(MI);
3390
3391 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3392 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3393 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3394
3395 // Avoid changing the result vector type if the source element type was
3396 // requested.
3397 if (TypeIdx == 1) {
3398 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3399 } else {
3400 widenScalarDst(MI, WideTy, 0);
3401 }
3402
3403 Observer.changedInstr(MI);
3404 return Legalized;
3405 }
3406 case TargetOpcode::G_SEXT_INREG:
3407 if (TypeIdx != 0)
3408 return UnableToLegalize;
3409
3410 Observer.changingInstr(MI);
3411 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3412 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3413 Observer.changedInstr(MI);
3414 return Legalized;
3415 case TargetOpcode::G_PTRMASK: {
3416 if (TypeIdx != 1)
3417 return UnableToLegalize;
3418 Observer.changingInstr(MI);
3419 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3420 Observer.changedInstr(MI);
3421 return Legalized;
3422 }
3423 case TargetOpcode::G_VECREDUCE_ADD: {
3424 if (TypeIdx != 1)
3425 return UnableToLegalize;
3426 Observer.changingInstr(MI);
3427 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3428 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3429 Observer.changedInstr(MI);
3430 return Legalized;
3431 }
3432 case TargetOpcode::G_VECREDUCE_FADD:
3433 case TargetOpcode::G_VECREDUCE_FMUL:
3434 case TargetOpcode::G_VECREDUCE_FMIN:
3435 case TargetOpcode::G_VECREDUCE_FMAX:
3436 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3437 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3438 if (TypeIdx != 0)
3439 return UnableToLegalize;
3440 Observer.changingInstr(MI);
3441 Register VecReg = MI.getOperand(1).getReg();
3442 LLT VecTy = MRI.getType(VecReg);
3443 LLT WideVecTy = VecTy.isVector()
3444 ? LLT::vector(VecTy.getElementCount(), WideTy)
3445 : WideTy;
3446 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3447 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 }
3451 case TargetOpcode::G_VSCALE: {
3452 MachineOperand &SrcMO = MI.getOperand(1);
3453 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3454 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3455 // The CImm is always a signed value
3456 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3457 Observer.changingInstr(MI);
3458 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3459 widenScalarDst(MI, WideTy);
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462 }
3463 case TargetOpcode::G_SPLAT_VECTOR: {
3464 if (TypeIdx != 1)
3465 return UnableToLegalize;
3466
3467 Observer.changingInstr(MI);
3468 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3469 Observer.changedInstr(MI);
3470 return Legalized;
3471 }
3472 case TargetOpcode::G_INSERT_SUBVECTOR: {
3473 if (TypeIdx != 0)
3474 return UnableToLegalize;
3475
3477 Register BigVec = IS.getBigVec();
3478 Register SubVec = IS.getSubVec();
3479
3480 LLT SubVecTy = MRI.getType(SubVec);
3481 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3482
3483 // Widen the G_INSERT_SUBVECTOR
3484 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3485 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3486 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3487 IS.getIndexImm());
3488
3489 // Truncate back down
3490 auto SplatZero = MIRBuilder.buildSplatVector(
3491 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3492 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3493 SplatZero);
3494
3495 MI.eraseFromParent();
3496
3497 return Legalized;
3498 }
3499 }
3500}
3501
3503 MachineIRBuilder &B, Register Src, LLT Ty) {
3504 auto Unmerge = B.buildUnmerge(Ty, Src);
3505 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3506 Pieces.push_back(Unmerge.getReg(I));
3507}
3508
3509static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3510 MachineIRBuilder &MIRBuilder) {
3511 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3512 MachineFunction &MF = MIRBuilder.getMF();
3513 const DataLayout &DL = MIRBuilder.getDataLayout();
3514 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3515 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3516 LLT DstLLT = MRI.getType(DstReg);
3517
3518 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3519
3520 auto Addr = MIRBuilder.buildConstantPool(
3521 AddrPtrTy,
3522 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3523
3524 MachineMemOperand *MMO =
3526 MachineMemOperand::MOLoad, DstLLT, Alignment);
3527
3528 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3529}
3530
3533 const MachineOperand &ConstOperand = MI.getOperand(1);
3534 const Constant *ConstantVal = ConstOperand.getCImm();
3535
3536 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3537 MI.eraseFromParent();
3538
3539 return Legalized;
3540}
3541
3544 const MachineOperand &ConstOperand = MI.getOperand(1);
3545 const Constant *ConstantVal = ConstOperand.getFPImm();
3546
3547 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3548 MI.eraseFromParent();
3549
3550 return Legalized;
3551}
3552
3555 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3556 if (SrcTy.isVector()) {
3557 LLT SrcEltTy = SrcTy.getElementType();
3559
3560 if (DstTy.isVector()) {
3561 int NumDstElt = DstTy.getNumElements();
3562 int NumSrcElt = SrcTy.getNumElements();
3563
3564 LLT DstEltTy = DstTy.getElementType();
3565 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3566 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3567
3568 // If there's an element size mismatch, insert intermediate casts to match
3569 // the result element type.
3570 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3571 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3572 //
3573 // =>
3574 //
3575 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3576 // %3:_(<2 x s8>) = G_BITCAST %2
3577 // %4:_(<2 x s8>) = G_BITCAST %3
3578 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3579 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3580 SrcPartTy = SrcEltTy;
3581 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3582 //
3583 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3584 //
3585 // =>
3586 //
3587 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3588 // %3:_(s16) = G_BITCAST %2
3589 // %4:_(s16) = G_BITCAST %3
3590 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3591 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3592 DstCastTy = DstEltTy;
3593 }
3594
3595 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3596 for (Register &SrcReg : SrcRegs)
3597 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3598 } else
3599 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3600
3601 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3602 MI.eraseFromParent();
3603 return Legalized;
3604 }
3605
3606 if (DstTy.isVector()) {
3608 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3609 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3610 MI.eraseFromParent();
3611 return Legalized;
3612 }
3613
3614 return UnableToLegalize;
3615}
3616
3617/// Figure out the bit offset into a register when coercing a vector index for
3618/// the wide element type. This is only for the case when promoting vector to
3619/// one with larger elements.
3620//
3621///
3622/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3623/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3625 Register Idx,
3626 unsigned NewEltSize,
3627 unsigned OldEltSize) {
3628 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3629 LLT IdxTy = B.getMRI()->getType(Idx);
3630
3631 // Now figure out the amount we need to shift to get the target bits.
3632 auto OffsetMask = B.buildConstant(
3633 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3634 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3635 return B.buildShl(IdxTy, OffsetIdx,
3636 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3637}
3638
3639/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3640/// is casting to a vector with a smaller element size, perform multiple element
3641/// extracts and merge the results. If this is coercing to a vector with larger
3642/// elements, index the bitcasted vector and extract the target element with bit
3643/// operations. This is intended to force the indexing in the native register
3644/// size for architectures that can dynamically index the register file.
3647 LLT CastTy) {
3648 if (TypeIdx != 1)
3649 return UnableToLegalize;
3650
3651 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3652
3653 LLT SrcEltTy = SrcVecTy.getElementType();
3654 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3655 unsigned OldNumElts = SrcVecTy.getNumElements();
3656
3657 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3658 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3659
3660 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3661 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3662 if (NewNumElts > OldNumElts) {
3663 // Decreasing the vector element size
3664 //
3665 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3666 // =>
3667 // v4i32:castx = bitcast x:v2i64
3668 //
3669 // i64 = bitcast
3670 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3671 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3672 //
3673 if (NewNumElts % OldNumElts != 0)
3674 return UnableToLegalize;
3675
3676 // Type of the intermediate result vector.
3677 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3678 LLT MidTy =
3679 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3680
3681 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3682
3683 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3684 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3685
3686 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3687 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3688 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3689 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3690 NewOps[I] = Elt.getReg(0);
3691 }
3692
3693 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3694 MIRBuilder.buildBitcast(Dst, NewVec);
3695 MI.eraseFromParent();
3696 return Legalized;
3697 }
3698
3699 if (NewNumElts < OldNumElts) {
3700 if (NewEltSize % OldEltSize != 0)
3701 return UnableToLegalize;
3702
3703 // This only depends on powers of 2 because we use bit tricks to figure out
3704 // the bit offset we need to shift to get the target element. A general
3705 // expansion could emit division/multiply.
3706 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3707 return UnableToLegalize;
3708
3709 // Increasing the vector element size.
3710 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3711 //
3712 // =>
3713 //
3714 // %cast = G_BITCAST %vec
3715 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3716 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3717 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3718 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3719 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3720 // %elt = G_TRUNC %elt_bits
3721
3722 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3723 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3724
3725 // Divide to get the index in the wider element type.
3726 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3727
3728 Register WideElt = CastVec;
3729 if (CastTy.isVector()) {
3730 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3731 ScaledIdx).getReg(0);
3732 }
3733
3734 // Compute the bit offset into the register of the target element.
3736 MIRBuilder, Idx, NewEltSize, OldEltSize);
3737
3738 // Shift the wide element to get the target element.
3739 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3740 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3741 MI.eraseFromParent();
3742 return Legalized;
3743 }
3744
3745 return UnableToLegalize;
3746}
3747
3748/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3749/// TargetReg, while preserving other bits in \p TargetReg.
3750///
3751/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3753 Register TargetReg, Register InsertReg,
3754 Register OffsetBits) {
3755 LLT TargetTy = B.getMRI()->getType(TargetReg);
3756 LLT InsertTy = B.getMRI()->getType(InsertReg);
3757 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3758 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3759
3760 // Produce a bitmask of the value to insert
3761 auto EltMask = B.buildConstant(
3762 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3763 InsertTy.getSizeInBits()));
3764 // Shift it into position
3765 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3766 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3767
3768 // Clear out the bits in the wide element
3769 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3770
3771 // The value to insert has all zeros already, so stick it into the masked
3772 // wide element.
3773 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3774}
3775
3776/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3777/// is increasing the element size, perform the indexing in the target element
3778/// type, and use bit operations to insert at the element position. This is
3779/// intended for architectures that can dynamically index the register file and
3780/// want to force indexing in the native register size.
3783 LLT CastTy) {
3784 if (TypeIdx != 0)
3785 return UnableToLegalize;
3786
3787 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3788 MI.getFirst4RegLLTs();
3789 LLT VecTy = DstTy;
3790
3791 LLT VecEltTy = VecTy.getElementType();
3792 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3793 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3794 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3795
3796 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3797 unsigned OldNumElts = VecTy.getNumElements();
3798
3799 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3800 if (NewNumElts < OldNumElts) {
3801 if (NewEltSize % OldEltSize != 0)
3802 return UnableToLegalize;
3803
3804 // This only depends on powers of 2 because we use bit tricks to figure out
3805 // the bit offset we need to shift to get the target element. A general
3806 // expansion could emit division/multiply.
3807 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3808 return UnableToLegalize;
3809
3810 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3811 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3812
3813 // Divide to get the index in the wider element type.
3814 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3815
3816 Register ExtractedElt = CastVec;
3817 if (CastTy.isVector()) {
3818 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3819 ScaledIdx).getReg(0);
3820 }
3821
3822 // Compute the bit offset into the register of the target element.
3824 MIRBuilder, Idx, NewEltSize, OldEltSize);
3825
3826 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3827 Val, OffsetBits);
3828 if (CastTy.isVector()) {
3829 InsertedElt = MIRBuilder.buildInsertVectorElement(
3830 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3831 }
3832
3833 MIRBuilder.buildBitcast(Dst, InsertedElt);
3834 MI.eraseFromParent();
3835 return Legalized;
3836 }
3837
3838 return UnableToLegalize;
3839}
3840
3841// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3842// those that have smaller than legal operands.
3843//
3844// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3845//
3846// ===>
3847//
3848// s32 = G_BITCAST <4 x s8>
3849// s32 = G_BITCAST <4 x s8>
3850// s32 = G_BITCAST <4 x s8>
3851// s32 = G_BITCAST <4 x s8>
3852// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3853// <16 x s8> = G_BITCAST <4 x s32>
3856 LLT CastTy) {
3857 // Convert it to CONCAT instruction
3858 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3859 if (!ConcatMI) {
3860 return UnableToLegalize;
3861 }
3862
3863 // Check if bitcast is Legal
3864 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3865 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3866
3867 // Check if the build vector is Legal
3868 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3869 return UnableToLegalize;
3870 }
3871
3872 // Bitcast the sources
3873 SmallVector<Register> BitcastRegs;
3874 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3875 BitcastRegs.push_back(
3876 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3877 .getReg(0));
3878 }
3879
3880 // Build the scalar values into a vector
3881 Register BuildReg =
3882 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3883 MIRBuilder.buildBitcast(DstReg, BuildReg);
3884
3885 MI.eraseFromParent();
3886 return Legalized;
3887}
3888
3889// This bitcasts a shuffle vector to a different type currently of the same
3890// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3891// will be used instead.
3892//
3893// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3894// ===>
3895// <4 x s64> = G_PTRTOINT <4 x p0>
3896// <4 x s64> = G_PTRTOINT <4 x p0>
3897// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3898// <16 x p0> = G_INTTOPTR <16 x s64>
3901 LLT CastTy) {
3902 auto ShuffleMI = cast<GShuffleVector>(&MI);
3903 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3904 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3905
3906 // We currently only handle vectors of the same size.
3907 if (TypeIdx != 0 ||
3908 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3909 CastTy.getElementCount() != DstTy.getElementCount())
3910 return UnableToLegalize;
3911
3912 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3913
3914 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3915 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3916 auto Shuf =
3917 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3918 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3919
3920 MI.eraseFromParent();
3921 return Legalized;
3922}
3923
3924/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3925///
3926/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3927///
3928/// ===>
3929///
3930/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3931/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3932/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3935 LLT CastTy) {
3936 auto ES = cast<GExtractSubvector>(&MI);
3937
3938 if (!CastTy.isVector())
3939 return UnableToLegalize;
3940
3941 if (TypeIdx != 0)
3942 return UnableToLegalize;
3943
3944 Register Dst = ES->getReg(0);
3945 Register Src = ES->getSrcVec();
3946 uint64_t Idx = ES->getIndexImm();
3947
3948 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3949
3950 LLT DstTy = MRI.getType(Dst);
3951 LLT SrcTy = MRI.getType(Src);
3952 ElementCount DstTyEC = DstTy.getElementCount();
3953 ElementCount SrcTyEC = SrcTy.getElementCount();
3954 auto DstTyMinElts = DstTyEC.getKnownMinValue();
3955 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
3956
3957 if (DstTy == CastTy)
3958 return Legalized;
3959
3960 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
3961 return UnableToLegalize;
3962
3963 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
3964 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
3965 if (CastEltSize < DstEltSize)
3966 return UnableToLegalize;
3967
3968 auto AdjustAmt = CastEltSize / DstEltSize;
3969 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3970 SrcTyMinElts % AdjustAmt != 0)
3971 return UnableToLegalize;
3972
3973 Idx /= AdjustAmt;
3974 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
3975 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
3976 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
3977 MIRBuilder.buildBitcast(Dst, PromotedES);
3978
3979 ES->eraseFromParent();
3980 return Legalized;
3981}
3982
3983/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3984///
3985/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3986/// <vscale x 8 x i1>,
3987/// N
3988///
3989/// ===>
3990///
3991/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3992/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3993/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3994/// <vscale x 1 x i8>, N / 8
3995/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3998 LLT CastTy) {
3999 auto ES = cast<GInsertSubvector>(&MI);
4000
4001 if (!CastTy.isVector())
4002 return UnableToLegalize;
4003
4004 if (TypeIdx != 0)
4005 return UnableToLegalize;
4006
4007 Register Dst = ES->getReg(0);
4008 Register BigVec = ES->getBigVec();
4009 Register SubVec = ES->getSubVec();
4010 uint64_t Idx = ES->getIndexImm();
4011
4012 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4013
4014 LLT DstTy = MRI.getType(Dst);
4015 LLT BigVecTy = MRI.getType(BigVec);
4016 LLT SubVecTy = MRI.getType(SubVec);
4017
4018 if (DstTy == CastTy)
4019 return Legalized;
4020
4021 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4022 return UnableToLegalize;
4023
4024 ElementCount DstTyEC = DstTy.getElementCount();
4025 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4026 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4027 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4028 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4029 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4030
4031 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4032 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4033 if (CastEltSize < DstEltSize)
4034 return UnableToLegalize;
4035
4036 auto AdjustAmt = CastEltSize / DstEltSize;
4037 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4038 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4039 return UnableToLegalize;
4040
4041 Idx /= AdjustAmt;
4042 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4043 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4044 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4045 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4046 auto PromotedIS =
4047 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4048 MIRBuilder.buildBitcast(Dst, PromotedIS);
4049
4050 ES->eraseFromParent();
4051 return Legalized;
4052}
4053
4055 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4056 Register DstReg = LoadMI.getDstReg();
4057 Register PtrReg = LoadMI.getPointerReg();
4058 LLT DstTy = MRI.getType(DstReg);
4059 MachineMemOperand &MMO = LoadMI.getMMO();
4060 LLT MemTy = MMO.getMemoryType();
4061 MachineFunction &MF = MIRBuilder.getMF();
4062
4063 unsigned MemSizeInBits = MemTy.getSizeInBits();
4064 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4065
4066 if (MemSizeInBits != MemStoreSizeInBits) {
4067 if (MemTy.isVector())
4068 return UnableToLegalize;
4069
4070 // Promote to a byte-sized load if not loading an integral number of
4071 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4072 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4073 MachineMemOperand *NewMMO =
4074 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4075
4076 Register LoadReg = DstReg;
4077 LLT LoadTy = DstTy;
4078
4079 // If this wasn't already an extending load, we need to widen the result
4080 // register to avoid creating a load with a narrower result than the source.
4081 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4082 LoadTy = WideMemTy;
4083 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4084 }
4085
4086 if (isa<GSExtLoad>(LoadMI)) {
4087 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4088 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4089 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4090 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4091 // The extra bits are guaranteed to be zero, since we stored them that
4092 // way. A zext load from Wide thus automatically gives zext from MemVT.
4093 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4094 } else {
4095 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4096 }
4097
4098 if (DstTy != LoadTy)
4099 MIRBuilder.buildTrunc(DstReg, LoadReg);
4100
4101 LoadMI.eraseFromParent();
4102 return Legalized;
4103 }
4104
4105 // Big endian lowering not implemented.
4106 if (MIRBuilder.getDataLayout().isBigEndian())
4107 return UnableToLegalize;
4108
4109 // This load needs splitting into power of 2 sized loads.
4110 //
4111 // Our strategy here is to generate anyextending loads for the smaller
4112 // types up to next power-2 result type, and then combine the two larger
4113 // result values together, before truncating back down to the non-pow-2
4114 // type.
4115 // E.g. v1 = i24 load =>
4116 // v2 = i32 zextload (2 byte)
4117 // v3 = i32 load (1 byte)
4118 // v4 = i32 shl v3, 16
4119 // v5 = i32 or v4, v2
4120 // v1 = i24 trunc v5
4121 // By doing this we generate the correct truncate which should get
4122 // combined away as an artifact with a matching extend.
4123
4124 uint64_t LargeSplitSize, SmallSplitSize;
4125
4126 if (!isPowerOf2_32(MemSizeInBits)) {
4127 // This load needs splitting into power of 2 sized loads.
4128 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4129 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4130 } else {
4131 // This is already a power of 2, but we still need to split this in half.
4132 //
4133 // Assume we're being asked to decompose an unaligned load.
4134 // TODO: If this requires multiple splits, handle them all at once.
4135 auto &Ctx = MF.getFunction().getContext();
4136 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4137 return UnableToLegalize;
4138
4139 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4140 }
4141
4142 if (MemTy.isVector()) {
4143 // TODO: Handle vector extloads
4144 if (MemTy != DstTy)
4145 return UnableToLegalize;
4146
4147 Align Alignment = LoadMI.getAlign();
4148 // Given an alignment larger than the size of the memory, we can increase
4149 // the size of the load without needing to scalarize it.
4150 if (Alignment.value() * 8 > MemSizeInBits &&
4153 DstTy.getElementType());
4154 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4155 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4156 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4157 NewLoad.getReg(0));
4158 LoadMI.eraseFromParent();
4159 return Legalized;
4160 }
4161
4162 // TODO: We can do better than scalarizing the vector and at least split it
4163 // in half.
4164 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4165 }
4166
4167 MachineMemOperand *LargeMMO =
4168 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4169 MachineMemOperand *SmallMMO =
4170 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4171
4172 LLT PtrTy = MRI.getType(PtrReg);
4173 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4174 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4175 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4176 PtrReg, *LargeMMO);
4177
4178 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4179 LargeSplitSize / 8);
4180 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4181 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4182 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4183 SmallPtr, *SmallMMO);
4184
4185 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4186 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4187
4188 if (AnyExtTy == DstTy)
4189 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4190 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4191 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4192 MIRBuilder.buildTrunc(DstReg, {Or});
4193 } else {
4194 assert(DstTy.isPointer() && "expected pointer");
4195 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4196
4197 // FIXME: We currently consider this to be illegal for non-integral address
4198 // spaces, but we need still need a way to reinterpret the bits.
4199 MIRBuilder.buildIntToPtr(DstReg, Or);
4200 }
4201
4202 LoadMI.eraseFromParent();
4203 return Legalized;
4204}
4205
4207 // Lower a non-power of 2 store into multiple pow-2 stores.
4208 // E.g. split an i24 store into an i16 store + i8 store.
4209 // We do this by first extending the stored value to the next largest power
4210 // of 2 type, and then using truncating stores to store the components.
4211 // By doing this, likewise with G_LOAD, generate an extend that can be
4212 // artifact-combined away instead of leaving behind extracts.
4213 Register SrcReg = StoreMI.getValueReg();
4214 Register PtrReg = StoreMI.getPointerReg();
4215 LLT SrcTy = MRI.getType(SrcReg);
4216 MachineFunction &MF = MIRBuilder.getMF();
4217 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4218 LLT MemTy = MMO.getMemoryType();
4219
4220 unsigned StoreWidth = MemTy.getSizeInBits();
4221 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4222
4223 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4224 // Promote to a byte-sized store with upper bits zero if not
4225 // storing an integral number of bytes. For example, promote
4226 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4227 LLT WideTy = LLT::scalar(StoreSizeInBits);
4228
4229 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4230 // Avoid creating a store with a narrower source than result.
4231 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4232 SrcTy = WideTy;
4233 }
4234
4235 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4236
4237 MachineMemOperand *NewMMO =
4238 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4239 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4240 StoreMI.eraseFromParent();
4241 return Legalized;
4242 }
4243
4244 if (MemTy.isVector()) {
4245 if (MemTy != SrcTy)
4246 return scalarizeVectorBooleanStore(StoreMI);
4247
4248 // TODO: We can do better than scalarizing the vector and at least split it
4249 // in half.
4250 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4251 }
4252
4253 unsigned MemSizeInBits = MemTy.getSizeInBits();
4254 uint64_t LargeSplitSize, SmallSplitSize;
4255
4256 if (!isPowerOf2_32(MemSizeInBits)) {
4257 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4258 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4259 } else {
4260 auto &Ctx = MF.getFunction().getContext();
4261 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4262 return UnableToLegalize; // Don't know what we're being asked to do.
4263
4264 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4265 }
4266
4267 // Extend to the next pow-2. If this store was itself the result of lowering,
4268 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4269 // that's wider than the stored size.
4270 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4271 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4272
4273 if (SrcTy.isPointer()) {
4274 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4275 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4276 }
4277
4278 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4279
4280 // Obtain the smaller value by shifting away the larger value.
4281 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4282 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4283
4284 // Generate the PtrAdd and truncating stores.
4285 LLT PtrTy = MRI.getType(PtrReg);
4286 auto OffsetCst = MIRBuilder.buildConstant(
4287 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4288 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4289
4290 MachineMemOperand *LargeMMO =
4291 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4292 MachineMemOperand *SmallMMO =
4293 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4294 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4295 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4296 StoreMI.eraseFromParent();
4297 return Legalized;
4298}
4299
4302 Register SrcReg = StoreMI.getValueReg();
4303 Register PtrReg = StoreMI.getPointerReg();
4304 LLT SrcTy = MRI.getType(SrcReg);
4305 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4306 LLT MemTy = MMO.getMemoryType();
4307 LLT MemScalarTy = MemTy.getElementType();
4308 MachineFunction &MF = MIRBuilder.getMF();
4309
4310 assert(SrcTy.isVector() && "Expect a vector store type");
4311
4312 if (!MemScalarTy.isByteSized()) {
4313 // We need to build an integer scalar of the vector bit pattern.
4314 // It's not legal for us to add padding when storing a vector.
4315 unsigned NumBits = MemTy.getSizeInBits();
4316 LLT IntTy = LLT::scalar(NumBits);
4317 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4318 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4319
4320 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4321 auto Elt = MIRBuilder.buildExtractVectorElement(
4322 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4323 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4324 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4325 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4326 ? (MemTy.getNumElements() - 1) - I
4327 : I;
4328 auto ShiftAmt = MIRBuilder.buildConstant(
4329 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4330 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4331 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4332 }
4333 auto PtrInfo = MMO.getPointerInfo();
4334 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4335 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4336 StoreMI.eraseFromParent();
4337 return Legalized;
4338 }
4339
4340 // TODO: implement simple scalarization.
4341 return UnableToLegalize;
4342}
4343
4345LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4346 switch (MI.getOpcode()) {
4347 case TargetOpcode::G_LOAD: {
4348 if (TypeIdx != 0)
4349 return UnableToLegalize;
4350 MachineMemOperand &MMO = **MI.memoperands_begin();
4351
4352 // Not sure how to interpret a bitcast of an extending load.
4353 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4354 return UnableToLegalize;
4355
4356 Observer.changingInstr(MI);
4357 bitcastDst(MI, CastTy, 0);
4358 MMO.setType(CastTy);
4359 // The range metadata is no longer valid when reinterpreted as a different
4360 // type.
4361 MMO.clearRanges();
4362 Observer.changedInstr(MI);
4363 return Legalized;
4364 }
4365 case TargetOpcode::G_STORE: {
4366 if (TypeIdx != 0)
4367 return UnableToLegalize;
4368
4369 MachineMemOperand &MMO = **MI.memoperands_begin();
4370
4371 // Not sure how to interpret a bitcast of a truncating store.
4372 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4373 return UnableToLegalize;
4374
4375 Observer.changingInstr(MI);
4376 bitcastSrc(MI, CastTy, 0);
4377 MMO.setType(CastTy);
4378 Observer.changedInstr(MI);
4379 return Legalized;
4380 }
4381 case TargetOpcode::G_SELECT: {
4382 if (TypeIdx != 0)
4383 return UnableToLegalize;
4384
4385 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4386 LLVM_DEBUG(
4387 dbgs() << "bitcast action not implemented for vector select\n");
4388 return UnableToLegalize;
4389 }
4390
4391 Observer.changingInstr(MI);
4392 bitcastSrc(MI, CastTy, 2);
4393 bitcastSrc(MI, CastTy, 3);
4394 bitcastDst(MI, CastTy, 0);
4395 Observer.changedInstr(MI);
4396 return Legalized;
4397 }
4398 case TargetOpcode::G_AND:
4399 case TargetOpcode::G_OR:
4400 case TargetOpcode::G_XOR: {
4401 Observer.changingInstr(MI);
4402 bitcastSrc(MI, CastTy, 1);
4403 bitcastSrc(MI, CastTy, 2);
4404 bitcastDst(MI, CastTy, 0);
4405 Observer.changedInstr(MI);
4406 return Legalized;
4407 }
4408 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4409 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4410 case TargetOpcode::G_INSERT_VECTOR_ELT:
4411 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4412 case TargetOpcode::G_CONCAT_VECTORS:
4413 return bitcastConcatVector(MI, TypeIdx, CastTy);
4414 case TargetOpcode::G_SHUFFLE_VECTOR:
4415 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4416 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4417 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4418 case TargetOpcode::G_INSERT_SUBVECTOR:
4419 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4420 default:
4421 return UnableToLegalize;
4422 }
4423}
4424
4425// Legalize an instruction by changing the opcode in place.
4426void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4428 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4430}
4431
4433LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4434 using namespace TargetOpcode;
4435
4436 switch(MI.getOpcode()) {
4437 default:
4438 return UnableToLegalize;
4439 case TargetOpcode::G_FCONSTANT:
4440 return lowerFConstant(MI);
4441 case TargetOpcode::G_BITCAST:
4442 return lowerBitcast(MI);
4443 case TargetOpcode::G_SREM:
4444 case TargetOpcode::G_UREM: {
4445 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4446 auto Quot =
4447 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4448 {MI.getOperand(1), MI.getOperand(2)});
4449
4450 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4451 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4452 MI.eraseFromParent();
4453 return Legalized;
4454 }
4455 case TargetOpcode::G_SADDO:
4456 case TargetOpcode::G_SSUBO:
4457 return lowerSADDO_SSUBO(MI);
4458 case TargetOpcode::G_SADDE:
4459 return lowerSADDE(MI);
4460 case TargetOpcode::G_SSUBE:
4461 return lowerSSUBE(MI);
4462 case TargetOpcode::G_UMULH:
4463 case TargetOpcode::G_SMULH:
4464 return lowerSMULH_UMULH(MI);
4465 case TargetOpcode::G_SMULO:
4466 case TargetOpcode::G_UMULO: {
4467 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4468 // result.
4469 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4470 LLT Ty = MRI.getType(Res);
4471
4472 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4473 ? TargetOpcode::G_SMULH
4474 : TargetOpcode::G_UMULH;
4475
4476 Observer.changingInstr(MI);
4477 const auto &TII = MIRBuilder.getTII();
4478 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4479 MI.removeOperand(1);
4480 Observer.changedInstr(MI);
4481
4482 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4483 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4484
4485 // Move insert point forward so we can use the Res register if needed.
4486 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4487
4488 // For *signed* multiply, overflow is detected by checking:
4489 // (hi != (lo >> bitwidth-1))
4490 if (Opcode == TargetOpcode::G_SMULH) {
4491 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4492 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4493 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4494 } else {
4495 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4496 }
4497 return Legalized;
4498 }
4499 case TargetOpcode::G_FNEG: {
4500 auto [Res, SubByReg] = MI.getFirst2Regs();
4501 LLT Ty = MRI.getType(Res);
4502
4503 auto SignMask = MIRBuilder.buildConstant(
4504 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4505 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4506 MI.eraseFromParent();
4507 return Legalized;
4508 }
4509 case TargetOpcode::G_FSUB:
4510 case TargetOpcode::G_STRICT_FSUB: {
4511 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4512 LLT Ty = MRI.getType(Res);
4513
4514 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4515 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4516
4517 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4518 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4519 else
4520 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4521
4522 MI.eraseFromParent();
4523 return Legalized;
4524 }
4525 case TargetOpcode::G_FMAD:
4526 return lowerFMad(MI);
4527 case TargetOpcode::G_FFLOOR:
4528 return lowerFFloor(MI);
4529 case TargetOpcode::G_LROUND:
4530 case TargetOpcode::G_LLROUND: {
4531 Register DstReg = MI.getOperand(0).getReg();
4532 Register SrcReg = MI.getOperand(1).getReg();
4533 LLT SrcTy = MRI.getType(SrcReg);
4534 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4535 {SrcReg});
4536 MIRBuilder.buildFPTOSI(DstReg, Round);
4537 MI.eraseFromParent();
4538 return Legalized;
4539 }
4540 case TargetOpcode::G_INTRINSIC_ROUND:
4541 return lowerIntrinsicRound(MI);
4542 case TargetOpcode::G_FRINT: {
4543 // Since round even is the assumed rounding mode for unconstrained FP
4544 // operations, rint and roundeven are the same operation.
4545 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4546 return Legalized;
4547 }
4548 case TargetOpcode::G_INTRINSIC_LRINT:
4549 case TargetOpcode::G_INTRINSIC_LLRINT: {
4550 Register DstReg = MI.getOperand(0).getReg();
4551 Register SrcReg = MI.getOperand(1).getReg();
4552 LLT SrcTy = MRI.getType(SrcReg);
4553 auto Round =
4554 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4555 MIRBuilder.buildFPTOSI(DstReg, Round);
4556 MI.eraseFromParent();
4557 return Legalized;
4558 }
4559 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4560 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4561 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4562 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4563 **MI.memoperands_begin());
4564 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4565 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4566 MI.eraseFromParent();
4567 return Legalized;
4568 }
4569 case TargetOpcode::G_LOAD:
4570 case TargetOpcode::G_SEXTLOAD:
4571 case TargetOpcode::G_ZEXTLOAD:
4572 return lowerLoad(cast<GAnyLoad>(MI));
4573 case TargetOpcode::G_STORE:
4574 return lowerStore(cast<GStore>(MI));
4575 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4576 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4577 case TargetOpcode::G_CTLZ:
4578 case TargetOpcode::G_CTTZ:
4579 case TargetOpcode::G_CTPOP:
4580 return lowerBitCount(MI);
4581 case G_UADDO: {
4582 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4583
4584 Register NewRes = MRI.cloneVirtualRegister(Res);
4585
4586 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4587 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4588
4589 MIRBuilder.buildCopy(Res, NewRes);
4590
4591 MI.eraseFromParent();
4592 return Legalized;
4593 }
4594 case G_UADDE: {
4595 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4596 const LLT CondTy = MRI.getType(CarryOut);
4597 const LLT Ty = MRI.getType(Res);
4598
4599 Register NewRes = MRI.cloneVirtualRegister(Res);
4600
4601 // Initial add of the two operands.
4602 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4603
4604 // Initial check for carry.
4605 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4606
4607 // Add the sum and the carry.
4608 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4609 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4610
4611 // Second check for carry. We can only carry if the initial sum is all 1s
4612 // and the carry is set, resulting in a new sum of 0.
4613 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4614 auto ResEqZero =
4615 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4616 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4617 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4618
4619 MIRBuilder.buildCopy(Res, NewRes);
4620
4621 MI.eraseFromParent();
4622 return Legalized;
4623 }
4624 case G_USUBO: {
4625 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4626
4627 MIRBuilder.buildSub(Res, LHS, RHS);
4628 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4629
4630 MI.eraseFromParent();
4631 return Legalized;
4632 }
4633 case G_USUBE: {
4634 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4635 const LLT CondTy = MRI.getType(BorrowOut);
4636 const LLT Ty = MRI.getType(Res);
4637
4638 // Initial subtract of the two operands.
4639 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4640
4641 // Initial check for borrow.
4642 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4643
4644 // Subtract the borrow from the first subtract.
4645 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4646 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4647
4648 // Second check for borrow. We can only borrow if the initial difference is
4649 // 0 and the borrow is set, resulting in a new difference of all 1s.
4650 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4651 auto TmpResEqZero =
4652 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4653 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4654 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4655
4656 MI.eraseFromParent();
4657 return Legalized;
4658 }
4659 case G_UITOFP:
4660 return lowerUITOFP(MI);
4661 case G_SITOFP:
4662 return lowerSITOFP(MI);
4663 case G_FPTOUI:
4664 return lowerFPTOUI(MI);
4665 case G_FPTOSI:
4666 return lowerFPTOSI(MI);
4667 case G_FPTOUI_SAT:
4668 case G_FPTOSI_SAT:
4669 return lowerFPTOINT_SAT(MI);
4670 case G_FPTRUNC:
4671 return lowerFPTRUNC(MI);
4672 case G_FPOWI:
4673 return lowerFPOWI(MI);
4674 case G_SMIN:
4675 case G_SMAX:
4676 case G_UMIN:
4677 case G_UMAX:
4678 return lowerMinMax(MI);
4679 case G_SCMP:
4680 case G_UCMP:
4681 return lowerThreewayCompare(MI);
4682 case G_FCOPYSIGN:
4683 return lowerFCopySign(MI);
4684 case G_FMINNUM:
4685 case G_FMAXNUM:
4686 case G_FMINIMUMNUM:
4687 case G_FMAXIMUMNUM:
4688 return lowerFMinNumMaxNum(MI);
4689 case G_MERGE_VALUES:
4690 return lowerMergeValues(MI);
4691 case G_UNMERGE_VALUES:
4692 return lowerUnmergeValues(MI);
4693 case TargetOpcode::G_SEXT_INREG: {
4694 assert(MI.getOperand(2).isImm() && "Expected immediate");
4695 int64_t SizeInBits = MI.getOperand(2).getImm();
4696
4697 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4698 LLT DstTy = MRI.getType(DstReg);
4699 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4700
4701 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4702 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4703 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4704 MI.eraseFromParent();
4705 return Legalized;
4706 }
4707 case G_EXTRACT_VECTOR_ELT:
4708 case G_INSERT_VECTOR_ELT:
4710 case G_SHUFFLE_VECTOR:
4711 return lowerShuffleVector(MI);
4712 case G_VECTOR_COMPRESS:
4713 return lowerVECTOR_COMPRESS(MI);
4714 case G_DYN_STACKALLOC:
4715 return lowerDynStackAlloc(MI);
4716 case G_STACKSAVE:
4717 return lowerStackSave(MI);
4718 case G_STACKRESTORE:
4719 return lowerStackRestore(MI);
4720 case G_EXTRACT:
4721 return lowerExtract(MI);
4722 case G_INSERT:
4723 return lowerInsert(MI);
4724 case G_BSWAP:
4725 return lowerBswap(MI);
4726 case G_BITREVERSE:
4727 return lowerBitreverse(MI);
4728 case G_READ_REGISTER:
4729 case G_WRITE_REGISTER:
4730 return lowerReadWriteRegister(MI);
4731 case G_UADDSAT:
4732 case G_USUBSAT: {
4733 // Try to make a reasonable guess about which lowering strategy to use. The
4734 // target can override this with custom lowering and calling the
4735 // implementation functions.
4736 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4737 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4738 return lowerAddSubSatToMinMax(MI);
4740 }
4741 case G_SADDSAT:
4742 case G_SSUBSAT: {
4743 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4744
4745 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4746 // since it's a shorter expansion. However, we would need to figure out the
4747 // preferred boolean type for the carry out for the query.
4748 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4749 return lowerAddSubSatToMinMax(MI);
4751 }
4752 case G_SSHLSAT:
4753 case G_USHLSAT:
4754 return lowerShlSat(MI);
4755 case G_ABS:
4756 return lowerAbsToAddXor(MI);
4757 case G_ABDS:
4758 case G_ABDU: {
4759 bool IsSigned = MI.getOpcode() == G_ABDS;
4760 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4761 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4762 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4763 return lowerAbsDiffToMinMax(MI);
4764 }
4765 return lowerAbsDiffToSelect(MI);
4766 }
4767 case G_FABS:
4768 return lowerFAbs(MI);
4769 case G_SELECT:
4770 return lowerSelect(MI);
4771 case G_IS_FPCLASS:
4772 return lowerISFPCLASS(MI);
4773 case G_SDIVREM:
4774 case G_UDIVREM:
4775 return lowerDIVREM(MI);
4776 case G_FSHL:
4777 case G_FSHR:
4778 return lowerFunnelShift(MI);
4779 case G_ROTL:
4780 case G_ROTR:
4781 return lowerRotate(MI);
4782 case G_MEMSET:
4783 case G_MEMCPY:
4784 case G_MEMMOVE:
4785 return lowerMemCpyFamily(MI);
4786 case G_MEMCPY_INLINE:
4787 return lowerMemcpyInline(MI);
4788 case G_ZEXT:
4789 case G_SEXT:
4790 case G_ANYEXT:
4791 return lowerEXT(MI);
4792 case G_TRUNC:
4793 return lowerTRUNC(MI);
4795 return lowerVectorReduction(MI);
4796 case G_VAARG:
4797 return lowerVAArg(MI);
4798 case G_ATOMICRMW_SUB: {
4799 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4800 const LLT ValTy = MRI.getType(Val);
4801 MachineMemOperand *MMO = *MI.memoperands_begin();
4802
4803 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4804 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4805 MI.eraseFromParent();
4806 return Legalized;
4807 }
4808 }
4809}
4810
4812 Align MinAlign) const {
4813 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4814 // datalayout for the preferred alignment. Also there should be a target hook
4815 // for this to allow targets to reduce the alignment and ignore the
4816 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4817 // the type.
4818 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4819}
4820
4823 MachinePointerInfo &PtrInfo) {
4824 MachineFunction &MF = MIRBuilder.getMF();
4825 const DataLayout &DL = MIRBuilder.getDataLayout();
4826 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4827
4828 unsigned AddrSpace = DL.getAllocaAddrSpace();
4829 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4830
4831 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4832 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4833}
4834
4836 const SrcOp &Val) {
4837 LLT SrcTy = Val.getLLTTy(MRI);
4838 Align StackTypeAlign =
4839 std::max(getStackTemporaryAlignment(SrcTy),
4841 MachinePointerInfo PtrInfo;
4842 auto StackTemp =
4843 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4844
4845 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4846 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4847}
4848
4850 LLT VecTy) {
4851 LLT IdxTy = B.getMRI()->getType(IdxReg);
4852 unsigned NElts = VecTy.getNumElements();
4853
4854 int64_t IdxVal;
4855 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4856 if (IdxVal < VecTy.getNumElements())
4857 return IdxReg;
4858 // If a constant index would be out of bounds, clamp it as well.
4859 }
4860
4861 if (isPowerOf2_32(NElts)) {
4862 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4863 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4864 }
4865
4866 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4867 .getReg(0);
4868}
4869
4871 Register Index) {
4872 LLT EltTy = VecTy.getElementType();
4873
4874 // Calculate the element offset and add it to the pointer.
4875 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4876 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4877 "Converting bits to bytes lost precision");
4878
4879 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4880
4881 // Convert index to the correct size for the address space.
4882 const DataLayout &DL = MIRBuilder.getDataLayout();
4883 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4884 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4885 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4886 if (IdxTy != MRI.getType(Index))
4887 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4888
4889 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4890 MIRBuilder.buildConstant(IdxTy, EltSize));
4891
4892 LLT PtrTy = MRI.getType(VecPtr);
4893 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4894}
4895
4896#ifndef NDEBUG
4897/// Check that all vector operands have same number of elements. Other operands
4898/// should be listed in NonVecOp.
4901 std::initializer_list<unsigned> NonVecOpIndices) {
4902 if (MI.getNumMemOperands() != 0)
4903 return false;
4904
4905 LLT VecTy = MRI.getType(MI.getReg(0));
4906 if (!VecTy.isVector())
4907 return false;
4908 unsigned NumElts = VecTy.getNumElements();
4909
4910 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4911 MachineOperand &Op = MI.getOperand(OpIdx);
4912 if (!Op.isReg()) {
4913 if (!is_contained(NonVecOpIndices, OpIdx))
4914 return false;
4915 continue;
4916 }
4917
4918 LLT Ty = MRI.getType(Op.getReg());
4919 if (!Ty.isVector()) {
4920 if (!is_contained(NonVecOpIndices, OpIdx))
4921 return false;
4922 continue;
4923 }
4924
4925 if (Ty.getNumElements() != NumElts)
4926 return false;
4927 }
4928
4929 return true;
4930}
4931#endif
4932
4933/// Fill \p DstOps with DstOps that have same number of elements combined as
4934/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4935/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4936/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4937static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4938 unsigned NumElts) {
4939 LLT LeftoverTy;
4940 assert(Ty.isVector() && "Expected vector type");
4941 LLT EltTy = Ty.getElementType();
4942 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4943 int NumParts, NumLeftover;
4944 std::tie(NumParts, NumLeftover) =
4945 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4946
4947 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4948 for (int i = 0; i < NumParts; ++i) {
4949 DstOps.push_back(NarrowTy);
4950 }
4951
4952 if (LeftoverTy.isValid()) {
4953 assert(NumLeftover == 1 && "expected exactly one leftover");
4954 DstOps.push_back(LeftoverTy);
4955 }
4956}
4957
4958/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4959/// made from \p Op depending on operand type.
4961 MachineOperand &Op) {
4962 for (unsigned i = 0; i < N; ++i) {
4963 if (Op.isReg())
4964 Ops.push_back(Op.getReg());
4965 else if (Op.isImm())
4966 Ops.push_back(Op.getImm());
4967 else if (Op.isPredicate())
4968 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4969 else
4970 llvm_unreachable("Unsupported type");
4971 }
4972}
4973
4974// Handle splitting vector operations which need to have the same number of
4975// elements in each type index, but each type index may have a different element
4976// type.
4977//
4978// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4979// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4980// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4981//
4982// Also handles some irregular breakdown cases, e.g.
4983// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4984// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4985// s64 = G_SHL s64, s32
4988 GenericMachineInstr &MI, unsigned NumElts,
4989 std::initializer_list<unsigned> NonVecOpIndices) {
4990 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4991 "Non-compatible opcode or not specified non-vector operands");
4992 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4993
4994 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4995 unsigned NumDefs = MI.getNumDefs();
4996
4997 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4998 // Build instructions with DstOps to use instruction found by CSE directly.
4999 // CSE copies found instruction into given vreg when building with vreg dest.
5000 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5001 // Output registers will be taken from created instructions.
5002 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5003 for (unsigned i = 0; i < NumDefs; ++i) {
5004 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5005 }
5006
5007 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5008 // Operands listed in NonVecOpIndices will be used as is without splitting;
5009 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5010 // scalar condition (op 1), immediate in sext_inreg (op 2).
5011 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5012 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5013 ++UseIdx, ++UseNo) {
5014 if (is_contained(NonVecOpIndices, UseIdx)) {
5015 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5016 MI.getOperand(UseIdx));
5017 } else {
5018 SmallVector<Register, 8> SplitPieces;
5019 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5020 MRI);
5021 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5022 }
5023 }
5024
5025 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5026
5027 // Take i-th piece of each input operand split and build sub-vector/scalar
5028 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5029 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5031 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5032 Defs.push_back(OutputOpsPieces[DstNo][i]);
5033
5035 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5036 Uses.push_back(InputOpsPieces[InputNo][i]);
5037
5038 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5039 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5040 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5041 }
5042
5043 // Merge small outputs into MI's output for each def operand.
5044 if (NumLeftovers) {
5045 for (unsigned i = 0; i < NumDefs; ++i)
5046 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5047 } else {
5048 for (unsigned i = 0; i < NumDefs; ++i)
5049 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5050 }
5051
5052 MI.eraseFromParent();
5053 return Legalized;
5054}
5055
5058 unsigned NumElts) {
5059 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5060
5061 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5062 unsigned NumDefs = MI.getNumDefs();
5063
5064 SmallVector<DstOp, 8> OutputOpsPieces;
5065 SmallVector<Register, 8> OutputRegs;
5066 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5067
5068 // Instructions that perform register split will be inserted in basic block
5069 // where register is defined (basic block is in the next operand).
5070 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5071 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5072 UseIdx += 2, ++UseNo) {
5073 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5074 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5075 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5076 MIRBuilder, MRI);
5077 }
5078
5079 // Build PHIs with fewer elements.
5080 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5081 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5082 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5083 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5084 Phi.addDef(
5085 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5086 OutputRegs.push_back(Phi.getReg(0));
5087
5088 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5089 Phi.addUse(InputOpsPieces[j][i]);
5090 Phi.add(MI.getOperand(1 + j * 2 + 1));
5091 }
5092 }
5093
5094 // Set the insert point after the existing PHIs
5095 MachineBasicBlock &MBB = *MI.getParent();
5096 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5097
5098 // Merge small outputs into MI's def.
5099 if (NumLeftovers) {
5100 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5101 } else {
5102 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5103 }
5104
5105 MI.eraseFromParent();
5106 return Legalized;
5107}
5108
5111 unsigned TypeIdx,
5112 LLT NarrowTy) {
5113 const int NumDst = MI.getNumOperands() - 1;
5114 const Register SrcReg = MI.getOperand(NumDst).getReg();
5115 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5116 LLT SrcTy = MRI.getType(SrcReg);
5117
5118 if (TypeIdx != 1 || NarrowTy == DstTy)
5119 return UnableToLegalize;
5120
5121 // Requires compatible types. Otherwise SrcReg should have been defined by
5122 // merge-like instruction that would get artifact combined. Most likely
5123 // instruction that defines SrcReg has to perform more/fewer elements
5124 // legalization compatible with NarrowTy.
5125 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5126 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5127
5128 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5129 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5130 return UnableToLegalize;
5131
5132 // This is most likely DstTy (smaller then register size) packed in SrcTy
5133 // (larger then register size) and since unmerge was not combined it will be
5134 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5135 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5136
5137 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5138 //
5139 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5140 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5141 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5142 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5143 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5144 const int PartsPerUnmerge = NumDst / NumUnmerge;
5145
5146 for (int I = 0; I != NumUnmerge; ++I) {
5147 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5148
5149 for (int J = 0; J != PartsPerUnmerge; ++J)
5150 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5151 MIB.addUse(Unmerge.getReg(I));
5152 }
5153
5154 MI.eraseFromParent();
5155 return Legalized;
5156}
5157
5160 LLT NarrowTy) {
5161 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5162 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5163 // that should have been artifact combined. Most likely instruction that uses
5164 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5165 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5166 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5167 if (NarrowTy == SrcTy)
5168 return UnableToLegalize;
5169
5170 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5171 // is for old mir tests. Since the changes to more/fewer elements it should no
5172 // longer be possible to generate MIR like this when starting from llvm-ir
5173 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5174 if (TypeIdx == 1) {
5175 assert(SrcTy.isVector() && "Expected vector types");
5176 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5177 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5178 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5179 return UnableToLegalize;
5180 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5181 //
5182 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5183 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5184 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5185 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5186 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5187 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5188
5190 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5191 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5192 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5193 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5194 Elts.push_back(Unmerge.getReg(j));
5195 }
5196
5197 SmallVector<Register, 8> NarrowTyElts;
5198 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5199 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5200 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5201 ++i, Offset += NumNarrowTyElts) {
5202 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5203 NarrowTyElts.push_back(
5204 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5205 }
5206
5207 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5208 MI.eraseFromParent();
5209 return Legalized;
5210 }
5211
5212 assert(TypeIdx == 0 && "Bad type index");
5213 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5214 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5215 return UnableToLegalize;
5216
5217 // This is most likely SrcTy (smaller then register size) packed in DstTy
5218 // (larger then register size) and since merge was not combined it will be
5219 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5220 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5221
5222 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5223 //
5224 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5225 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5226 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5227 SmallVector<Register, 8> NarrowTyElts;
5228 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5229 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5230 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5231 for (unsigned i = 0; i < NumParts; ++i) {
5233 for (unsigned j = 0; j < NumElts; ++j)
5234 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5235 NarrowTyElts.push_back(
5236 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5237 }
5238
5239 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5240 MI.eraseFromParent();
5241 return Legalized;
5242}
5243
5246 unsigned TypeIdx,
5247 LLT NarrowVecTy) {
5248 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5249 Register InsertVal;
5250 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5251
5252 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5253 if (IsInsert)
5254 InsertVal = MI.getOperand(2).getReg();
5255
5256 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5257 LLT VecTy = MRI.getType(SrcVec);
5258
5259 // If the index is a constant, we can really break this down as you would
5260 // expect, and index into the target size pieces.
5261 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5262 if (MaybeCst) {
5263 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5264 // Avoid out of bounds indexing the pieces.
5265 if (IdxVal >= VecTy.getNumElements()) {
5266 MIRBuilder.buildUndef(DstReg);
5267 MI.eraseFromParent();
5268 return Legalized;
5269 }
5270
5271 if (!NarrowVecTy.isVector()) {
5272 SmallVector<Register, 8> SplitPieces;
5273 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5274 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5275 if (IsInsert) {
5276 SplitPieces[IdxVal] = InsertVal;
5277 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5278 } else {
5279 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5280 }
5281 } else {
5282 SmallVector<Register, 8> VecParts;
5283 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5284
5285 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5286 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5287 TargetOpcode::G_ANYEXT);
5288
5289 unsigned NewNumElts = NarrowVecTy.getNumElements();
5290
5291 LLT IdxTy = MRI.getType(Idx);
5292 int64_t PartIdx = IdxVal / NewNumElts;
5293 auto NewIdx =
5294 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5295
5296 if (IsInsert) {
5297 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5298
5299 // Use the adjusted index to insert into one of the subvectors.
5300 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5301 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5302 VecParts[PartIdx] = InsertPart.getReg(0);
5303
5304 // Recombine the inserted subvector with the others to reform the result
5305 // vector.
5306 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5307 } else {
5308 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5309 }
5310 }
5311
5312 MI.eraseFromParent();
5313 return Legalized;
5314 }
5315
5316 // With a variable index, we can't perform the operation in a smaller type, so
5317 // we're forced to expand this.
5318 //
5319 // TODO: We could emit a chain of compare/select to figure out which piece to
5320 // index.
5322}
5323
5326 LLT NarrowTy) {
5327 // FIXME: Don't know how to handle secondary types yet.
5328 if (TypeIdx != 0)
5329 return UnableToLegalize;
5330
5331 if (!NarrowTy.isByteSized()) {
5332 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5333 return UnableToLegalize;
5334 }
5335
5336 // This implementation doesn't work for atomics. Give up instead of doing
5337 // something invalid.
5338 if (LdStMI.isAtomic())
5339 return UnableToLegalize;
5340
5341 bool IsLoad = isa<GLoad>(LdStMI);
5342 Register ValReg = LdStMI.getReg(0);
5343 Register AddrReg = LdStMI.getPointerReg();
5344 LLT ValTy = MRI.getType(ValReg);
5345
5346 // FIXME: Do we need a distinct NarrowMemory legalize action?
5347 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5348 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5349 return UnableToLegalize;
5350 }
5351
5352 int NumParts = -1;
5353 int NumLeftover = -1;
5354 LLT LeftoverTy;
5355 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5356 if (IsLoad) {
5357 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5358 } else {
5359 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5360 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5361 NumParts = NarrowRegs.size();
5362 NumLeftover = NarrowLeftoverRegs.size();
5363 }
5364 }
5365
5366 if (NumParts == -1)
5367 return UnableToLegalize;
5368
5369 LLT PtrTy = MRI.getType(AddrReg);
5370 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5371
5372 unsigned TotalSize = ValTy.getSizeInBits();
5373
5374 // Split the load/store into PartTy sized pieces starting at Offset. If this
5375 // is a load, return the new registers in ValRegs. For a store, each elements
5376 // of ValRegs should be PartTy. Returns the next offset that needs to be
5377 // handled.
5378 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5379 auto MMO = LdStMI.getMMO();
5380 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5381 unsigned NumParts, unsigned Offset) -> unsigned {
5382 MachineFunction &MF = MIRBuilder.getMF();
5383 unsigned PartSize = PartTy.getSizeInBits();
5384 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5385 ++Idx) {
5386 unsigned ByteOffset = Offset / 8;
5387 Register NewAddrReg;
5388
5389 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5390 ByteOffset);
5391
5392 MachineMemOperand *NewMMO =
5393 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5394
5395 if (IsLoad) {
5396 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5397 ValRegs.push_back(Dst);
5398 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5399 } else {
5400 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5401 }
5402 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5403 }
5404
5405 return Offset;
5406 };
5407
5408 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5409 unsigned HandledOffset =
5410 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5411
5412 // Handle the rest of the register if this isn't an even type breakdown.
5413 if (LeftoverTy.isValid())
5414 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5415
5416 if (IsLoad) {
5417 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5418 LeftoverTy, NarrowLeftoverRegs);
5419 }
5420
5421 LdStMI.eraseFromParent();
5422 return Legalized;
5423}
5424
5427 LLT NarrowTy) {
5428 using namespace TargetOpcode;
5430 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5431
5432 switch (MI.getOpcode()) {
5433 case G_IMPLICIT_DEF:
5434 case G_TRUNC:
5435 case G_AND:
5436 case G_OR:
5437 case G_XOR:
5438 case G_ADD:
5439 case G_SUB:
5440 case G_MUL:
5441 case G_PTR_ADD:
5442 case G_SMULH:
5443 case G_UMULH:
5444 case G_FADD:
5445 case G_FMUL:
5446 case G_FSUB:
5447 case G_FNEG:
5448 case G_FABS:
5449 case G_FCANONICALIZE:
5450 case G_FDIV:
5451 case G_FREM:
5452 case G_FMA:
5453 case G_FMAD:
5454 case G_FPOW:
5455 case G_FEXP:
5456 case G_FEXP2:
5457 case G_FEXP10:
5458 case G_FLOG:
5459 case G_FLOG2:
5460 case G_FLOG10:
5461 case G_FLDEXP:
5462 case G_FNEARBYINT:
5463 case G_FCEIL:
5464 case G_FFLOOR:
5465 case G_FRINT:
5466 case G_INTRINSIC_LRINT:
5467 case G_INTRINSIC_LLRINT:
5468 case G_INTRINSIC_ROUND:
5469 case G_INTRINSIC_ROUNDEVEN:
5470 case G_LROUND:
5471 case G_LLROUND:
5472 case G_INTRINSIC_TRUNC:
5473 case G_FCOS:
5474 case G_FSIN:
5475 case G_FTAN:
5476 case G_FACOS:
5477 case G_FASIN:
5478 case G_FATAN:
5479 case G_FATAN2:
5480 case G_FCOSH:
5481 case G_FSINH:
5482 case G_FTANH:
5483 case G_FSQRT:
5484 case G_BSWAP:
5485 case G_BITREVERSE:
5486 case G_SDIV:
5487 case G_UDIV:
5488 case G_SREM:
5489 case G_UREM:
5490 case G_SDIVREM:
5491 case G_UDIVREM:
5492 case G_SMIN:
5493 case G_SMAX:
5494 case G_UMIN:
5495 case G_UMAX:
5496 case G_ABS:
5497 case G_FMINNUM:
5498 case G_FMAXNUM:
5499 case G_FMINNUM_IEEE:
5500 case G_FMAXNUM_IEEE:
5501 case G_FMINIMUM:
5502 case G_FMAXIMUM:
5503 case G_FMINIMUMNUM:
5504 case G_FMAXIMUMNUM:
5505 case G_FSHL:
5506 case G_FSHR:
5507 case G_ROTL:
5508 case G_ROTR:
5509 case G_FREEZE:
5510 case G_SADDSAT:
5511 case G_SSUBSAT:
5512 case G_UADDSAT:
5513 case G_USUBSAT:
5514 case G_UMULO:
5515 case G_SMULO:
5516 case G_SHL:
5517 case G_LSHR:
5518 case G_ASHR:
5519 case G_SSHLSAT:
5520 case G_USHLSAT:
5521 case G_CTLZ:
5522 case G_CTLZ_ZERO_UNDEF:
5523 case G_CTTZ:
5524 case G_CTTZ_ZERO_UNDEF:
5525 case G_CTPOP:
5526 case G_FCOPYSIGN:
5527 case G_ZEXT:
5528 case G_SEXT:
5529 case G_ANYEXT:
5530 case G_FPEXT:
5531 case G_FPTRUNC:
5532 case G_SITOFP:
5533 case G_UITOFP:
5534 case G_FPTOSI:
5535 case G_FPTOUI:
5536 case G_FPTOSI_SAT:
5537 case G_FPTOUI_SAT:
5538 case G_INTTOPTR:
5539 case G_PTRTOINT:
5540 case G_ADDRSPACE_CAST:
5541 case G_UADDO:
5542 case G_USUBO:
5543 case G_UADDE:
5544 case G_USUBE:
5545 case G_SADDO:
5546 case G_SSUBO:
5547 case G_SADDE:
5548 case G_SSUBE:
5549 case G_STRICT_FADD:
5550 case G_STRICT_FSUB:
5551 case G_STRICT_FMUL:
5552 case G_STRICT_FMA:
5553 case G_STRICT_FLDEXP:
5554 case G_FFREXP:
5555 return fewerElementsVectorMultiEltType(GMI, NumElts);
5556 case G_ICMP:
5557 case G_FCMP:
5558 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5559 case G_IS_FPCLASS:
5560 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5561 case G_SELECT:
5562 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5563 return fewerElementsVectorMultiEltType(GMI, NumElts);
5564 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5565 case G_PHI:
5566 return fewerElementsVectorPhi(GMI, NumElts);
5567 case G_UNMERGE_VALUES:
5568 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5569 case G_BUILD_VECTOR:
5570 assert(TypeIdx == 0 && "not a vector type index");
5571 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5572 case G_CONCAT_VECTORS:
5573 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5574 return UnableToLegalize;
5575 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5576 case G_EXTRACT_VECTOR_ELT:
5577 case G_INSERT_VECTOR_ELT:
5578 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5579 case G_LOAD:
5580 case G_STORE:
5581 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5582 case G_SEXT_INREG:
5583 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5585 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5586 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5587 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5588 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5589 case G_SHUFFLE_VECTOR:
5590 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5591 case G_FPOWI:
5592 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5593 case G_BITCAST:
5594 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5595 case G_INTRINSIC_FPTRUNC_ROUND:
5596 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5597 default:
5598 return UnableToLegalize;
5599 }
5600}
5601
5604 LLT NarrowTy) {
5605 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5606 "Not a bitcast operation");
5607
5608 if (TypeIdx != 0)
5609 return UnableToLegalize;
5610
5611 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5612
5613 unsigned NewElemCount =
5614 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5615 SmallVector<Register> SrcVRegs, BitcastVRegs;
5616 if (NewElemCount == 1) {
5617 LLT SrcNarrowTy = SrcTy.getElementType();
5618
5619 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5620 getUnmergeResults(SrcVRegs, *Unmerge);
5621 } else {
5622 LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
5623
5624 // Split the Src and Dst Reg into smaller registers
5625 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5626 return UnableToLegalize;
5627 }
5628
5629 // Build new smaller bitcast instructions
5630 // Not supporting Leftover types for now but will have to
5631 for (Register Reg : SrcVRegs)
5632 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5633
5634 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5635 MI.eraseFromParent();
5636 return Legalized;
5637}
5638
5640 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5641 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5642 if (TypeIdx != 0)
5643 return UnableToLegalize;
5644
5645 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5646 MI.getFirst3RegLLTs();
5647 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5648 // The shuffle should be canonicalized by now.
5649 if (DstTy != Src1Ty)
5650 return UnableToLegalize;
5651 if (DstTy != Src2Ty)
5652 return UnableToLegalize;
5653
5654 if (!isPowerOf2_32(DstTy.getNumElements()))
5655 return UnableToLegalize;
5656
5657 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5658 // Further legalization attempts will be needed to do split further.
5659 NarrowTy =
5660 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5661 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5662
5663 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5664 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5665 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5666 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5667 SplitSrc2Regs[1]};
5668
5669 Register Hi, Lo;
5670
5671 // If Lo or Hi uses elements from at most two of the four input vectors, then
5672 // express it as a vector shuffle of those two inputs. Otherwise extract the
5673 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5675 for (unsigned High = 0; High < 2; ++High) {
5676 Register &Output = High ? Hi : Lo;
5677
5678 // Build a shuffle mask for the output, discovering on the fly which
5679 // input vectors to use as shuffle operands (recorded in InputUsed).
5680 // If building a suitable shuffle vector proves too hard, then bail
5681 // out with useBuildVector set.
5682 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5683 unsigned FirstMaskIdx = High * NewElts;
5684 bool UseBuildVector = false;
5685 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5686 // The mask element. This indexes into the input.
5687 int Idx = Mask[FirstMaskIdx + MaskOffset];
5688
5689 // The input vector this mask element indexes into.
5690 unsigned Input = (unsigned)Idx / NewElts;
5691
5692 if (Input >= std::size(Inputs)) {
5693 // The mask element does not index into any input vector.
5694 Ops.push_back(-1);
5695 continue;
5696 }
5697
5698 // Turn the index into an offset from the start of the input vector.
5699 Idx -= Input * NewElts;
5700
5701 // Find or create a shuffle vector operand to hold this input.
5702 unsigned OpNo;
5703 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5704 if (InputUsed[OpNo] == Input) {
5705 // This input vector is already an operand.
5706 break;
5707 } else if (InputUsed[OpNo] == -1U) {
5708 // Create a new operand for this input vector.
5709 InputUsed[OpNo] = Input;
5710 break;
5711 }
5712 }
5713
5714 if (OpNo >= std::size(InputUsed)) {
5715 // More than two input vectors used! Give up on trying to create a
5716 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5717 UseBuildVector = true;
5718 break;
5719 }
5720
5721 // Add the mask index for the new shuffle vector.
5722 Ops.push_back(Idx + OpNo * NewElts);
5723 }
5724
5725 if (UseBuildVector) {
5726 LLT EltTy = NarrowTy.getElementType();
5728
5729 // Extract the input elements by hand.
5730 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5731 // The mask element. This indexes into the input.
5732 int Idx = Mask[FirstMaskIdx + MaskOffset];
5733
5734 // The input vector this mask element indexes into.
5735 unsigned Input = (unsigned)Idx / NewElts;
5736
5737 if (Input >= std::size(Inputs)) {
5738 // The mask element is "undef" or indexes off the end of the input.
5739 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5740 continue;
5741 }
5742
5743 // Turn the index into an offset from the start of the input vector.
5744 Idx -= Input * NewElts;
5745
5746 // Extract the vector element by hand.
5747 SVOps.push_back(MIRBuilder
5748 .buildExtractVectorElement(
5749 EltTy, Inputs[Input],
5750 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5751 .getReg(0));
5752 }
5753
5754 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5755 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5756 } else if (InputUsed[0] == -1U) {
5757 // No input vectors were used! The result is undefined.
5758 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5759 } else {
5760 Register Op0 = Inputs[InputUsed[0]];
5761 // If only one input was used, use an undefined vector for the other.
5762 Register Op1 = InputUsed[1] == -1U
5763 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5764 : Inputs[InputUsed[1]];
5765 // At least one input vector was used. Create a new shuffle vector.
5766 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5767 }
5768
5769 Ops.clear();
5770 }
5771
5772 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5773 MI.eraseFromParent();
5774 return Legalized;
5775}
5776
5778 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5779 auto &RdxMI = cast<GVecReduce>(MI);
5780
5781 if (TypeIdx != 1)
5782 return UnableToLegalize;
5783
5784 // The semantics of the normal non-sequential reductions allow us to freely
5785 // re-associate the operation.
5786 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5787
5788 if (NarrowTy.isVector() &&
5789 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5790 return UnableToLegalize;
5791
5792 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5793 SmallVector<Register> SplitSrcs;
5794 // If NarrowTy is a scalar then we're being asked to scalarize.
5795 const unsigned NumParts =
5796 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5797 : SrcTy.getNumElements();
5798
5799 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5800 if (NarrowTy.isScalar()) {
5801 if (DstTy != NarrowTy)
5802 return UnableToLegalize; // FIXME: handle implicit extensions.
5803
5804 if (isPowerOf2_32(NumParts)) {
5805 // Generate a tree of scalar operations to reduce the critical path.
5806 SmallVector<Register> PartialResults;
5807 unsigned NumPartsLeft = NumParts;
5808 while (NumPartsLeft > 1) {
5809 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5810 PartialResults.emplace_back(
5812 .buildInstr(ScalarOpc, {NarrowTy},
5813 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5814 .getReg(0));
5815 }
5816 SplitSrcs = PartialResults;
5817 PartialResults.clear();
5818 NumPartsLeft = SplitSrcs.size();
5819 }
5820 assert(SplitSrcs.size() == 1);
5821 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5822 MI.eraseFromParent();
5823 return Legalized;
5824 }
5825 // If we can't generate a tree, then just do sequential operations.
5826 Register Acc = SplitSrcs[0];
5827 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5828 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5829 .getReg(0);
5830 MIRBuilder.buildCopy(DstReg, Acc);
5831 MI.eraseFromParent();
5832 return Legalized;
5833 }
5834 SmallVector<Register> PartialReductions;
5835 for (unsigned Part = 0; Part < NumParts; ++Part) {
5836 PartialReductions.push_back(
5837 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5838 .getReg(0));
5839 }
5840
5841 // If the types involved are powers of 2, we can generate intermediate vector
5842 // ops, before generating a final reduction operation.
5843 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5844 isPowerOf2_32(NarrowTy.getNumElements())) {
5845 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5846 }
5847
5848 Register Acc = PartialReductions[0];
5849 for (unsigned Part = 1; Part < NumParts; ++Part) {
5850 if (Part == NumParts - 1) {
5851 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5852 {Acc, PartialReductions[Part]});
5853 } else {
5854 Acc = MIRBuilder
5855 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5856 .getReg(0);
5857 }
5858 }
5859 MI.eraseFromParent();
5860 return Legalized;
5861}
5862
5865 unsigned int TypeIdx,
5866 LLT NarrowTy) {
5867 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5868 MI.getFirst3RegLLTs();
5869 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5870 DstTy != NarrowTy)
5871 return UnableToLegalize;
5872
5873 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5874 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5875 "Unexpected vecreduce opcode");
5876 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5877 ? TargetOpcode::G_FADD
5878 : TargetOpcode::G_FMUL;
5879
5880 SmallVector<Register> SplitSrcs;
5881 unsigned NumParts = SrcTy.getNumElements();
5882 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5883 Register Acc = ScalarReg;
5884 for (unsigned i = 0; i < NumParts; i++)
5885 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5886 .getReg(0);
5887
5888 MIRBuilder.buildCopy(DstReg, Acc);
5889 MI.eraseFromParent();
5890 return Legalized;
5891}
5892
5894LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5895 LLT SrcTy, LLT NarrowTy,
5896 unsigned ScalarOpc) {
5897 SmallVector<Register> SplitSrcs;
5898 // Split the sources into NarrowTy size pieces.
5899 extractParts(SrcReg, NarrowTy,
5900 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5901 MIRBuilder, MRI);
5902 // We're going to do a tree reduction using vector operations until we have
5903 // one NarrowTy size value left.
5904 while (SplitSrcs.size() > 1) {
5905 SmallVector<Register> PartialRdxs;
5906 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5907 Register LHS = SplitSrcs[Idx];
5908 Register RHS = SplitSrcs[Idx + 1];
5909 // Create the intermediate vector op.
5910 Register Res =
5911 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5912 PartialRdxs.push_back(Res);
5913 }
5914 SplitSrcs = std::move(PartialRdxs);
5915 }
5916 // Finally generate the requested NarrowTy based reduction.
5917 Observer.changingInstr(MI);
5918 MI.getOperand(1).setReg(SplitSrcs[0]);
5919 Observer.changedInstr(MI);
5920 return Legalized;
5921}
5922
5925 const LLT HalfTy, const LLT AmtTy) {
5926
5927 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5928 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5929 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5930
5931 if (Amt.isZero()) {
5932 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5933 MI.eraseFromParent();
5934 return Legalized;
5935 }
5936
5937 LLT NVT = HalfTy;
5938 unsigned NVTBits = HalfTy.getSizeInBits();
5939 unsigned VTBits = 2 * NVTBits;
5940
5941 SrcOp Lo(Register(0)), Hi(Register(0));
5942 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5943 if (Amt.ugt(VTBits)) {
5944 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5945 } else if (Amt.ugt(NVTBits)) {
5946 Lo = MIRBuilder.buildConstant(NVT, 0);
5947 Hi = MIRBuilder.buildShl(NVT, InL,
5948 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5949 } else if (Amt == NVTBits) {
5950 Lo = MIRBuilder.buildConstant(NVT, 0);
5951 Hi = InL;
5952 } else {
5953 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5954 auto OrLHS =
5955 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5956 auto OrRHS = MIRBuilder.buildLShr(
5957 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5958 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5959 }
5960 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5961 if (Amt.ugt(VTBits)) {
5962 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5963 } else if (Amt.ugt(NVTBits)) {
5964 Lo = MIRBuilder.buildLShr(NVT, InH,
5965 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5966 Hi = MIRBuilder.buildConstant(NVT, 0);
5967 } else if (Amt == NVTBits) {
5968 Lo = InH;
5969 Hi = MIRBuilder.buildConstant(NVT, 0);
5970 } else {
5971 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5972
5973 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5974 auto OrRHS = MIRBuilder.buildShl(
5975 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5976
5977 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5978 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5979 }
5980 } else {
5981 if (Amt.ugt(VTBits)) {
5982 Hi = Lo = MIRBuilder.buildAShr(
5983 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5984 } else if (Amt.ugt(NVTBits)) {
5985 Lo = MIRBuilder.buildAShr(NVT, InH,
5986 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5987 Hi = MIRBuilder.buildAShr(NVT, InH,
5988 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5989 } else if (Amt == NVTBits) {
5990 Lo = InH;
5991 Hi = MIRBuilder.buildAShr(NVT, InH,
5992 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5993 } else {
5994 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5995
5996 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5997 auto OrRHS = MIRBuilder.buildShl(
5998 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5999
6000 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6001 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6002 }
6003 }
6004
6005 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6006 MI.eraseFromParent();
6007
6008 return Legalized;
6009}
6010
6013 LLT RequestedTy) {
6014 if (TypeIdx == 1) {
6015 Observer.changingInstr(MI);
6016 narrowScalarSrc(MI, RequestedTy, 2);
6017 Observer.changedInstr(MI);
6018 return Legalized;
6019 }
6020
6021 Register DstReg = MI.getOperand(0).getReg();
6022 LLT DstTy = MRI.getType(DstReg);
6023 if (DstTy.isVector())
6024 return UnableToLegalize;
6025
6026 Register Amt = MI.getOperand(2).getReg();
6027 LLT ShiftAmtTy = MRI.getType(Amt);
6028 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6029 if (DstEltSize % 2 != 0)
6030 return UnableToLegalize;
6031
6032 // Check if we should use multi-way splitting instead of recursive binary
6033 // splitting.
6034 //
6035 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6036 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6037 // and dependency chains created by usual binary splitting approach
6038 // (128->64->32).
6039 //
6040 // The >= 8 parts threshold ensures we only use this optimization when binary
6041 // splitting would require multiple recursive passes, avoiding overhead for
6042 // simple 2-way splits where binary approach is sufficient.
6043 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6044 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6045 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6046 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6047 // steps).
6048 if (NumParts >= 8)
6049 return narrowScalarShiftMultiway(MI, RequestedTy);
6050 }
6051
6052 // Fall back to binary splitting:
6053 // Ignore the input type. We can only go to exactly half the size of the
6054 // input. If that isn't small enough, the resulting pieces will be further
6055 // legalized.
6056 const unsigned NewBitSize = DstEltSize / 2;
6057 const LLT HalfTy = LLT::scalar(NewBitSize);
6058 const LLT CondTy = LLT::scalar(1);
6059
6060 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6061 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6062 ShiftAmtTy);
6063 }
6064
6065 // TODO: Expand with known bits.
6066
6067 // Handle the fully general expansion by an unknown amount.
6068 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6069
6070 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6071 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6072 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6073
6074 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6075 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6076
6077 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6078 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6079 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6080
6081 Register ResultRegs[2];
6082 switch (MI.getOpcode()) {
6083 case TargetOpcode::G_SHL: {
6084 // Short: ShAmt < NewBitSize
6085 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6086
6087 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6088 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6089 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6090
6091 // Long: ShAmt >= NewBitSize
6092 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6093 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6094
6095 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6096 auto Hi = MIRBuilder.buildSelect(
6097 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6098
6099 ResultRegs[0] = Lo.getReg(0);
6100 ResultRegs[1] = Hi.getReg(0);
6101 break;
6102 }
6103 case TargetOpcode::G_LSHR:
6104 case TargetOpcode::G_ASHR: {
6105 // Short: ShAmt < NewBitSize
6106 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6107
6108 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6109 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6110 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6111
6112 // Long: ShAmt >= NewBitSize
6114 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6115 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6116 } else {
6117 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6118 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6119 }
6120 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6121 {InH, AmtExcess}); // Lo from Hi part.
6122
6123 auto Lo = MIRBuilder.buildSelect(
6124 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6125
6126 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6127
6128 ResultRegs[0] = Lo.getReg(0);
6129 ResultRegs[1] = Hi.getReg(0);
6130 break;
6131 }
6132 default:
6133 llvm_unreachable("not a shift");
6134 }
6135
6136 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6137 MI.eraseFromParent();
6138 return Legalized;
6139}
6140
6142 unsigned PartIdx,
6143 unsigned NumParts,
6144 ArrayRef<Register> SrcParts,
6145 const ShiftParams &Params,
6146 LLT TargetTy, LLT ShiftAmtTy) {
6147 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6148 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6149 assert(WordShiftConst && BitShiftConst && "Expected constants");
6150
6151 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6152 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6153 const bool NeedsInterWordShift = ShiftBits != 0;
6154
6155 switch (Opcode) {
6156 case TargetOpcode::G_SHL: {
6157 // Data moves from lower indices to higher indices
6158 // If this part would come from a source beyond our range, it's zero
6159 if (PartIdx < ShiftWords)
6160 return Params.Zero;
6161
6162 unsigned SrcIdx = PartIdx - ShiftWords;
6163 if (!NeedsInterWordShift)
6164 return SrcParts[SrcIdx];
6165
6166 // Combine shifted main part with carry from previous part
6167 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6168 if (SrcIdx > 0) {
6169 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6170 Params.InvBitShift);
6171 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6172 }
6173 return Hi.getReg(0);
6174 }
6175
6176 case TargetOpcode::G_LSHR: {
6177 unsigned SrcIdx = PartIdx + ShiftWords;
6178 if (SrcIdx >= NumParts)
6179 return Params.Zero;
6180 if (!NeedsInterWordShift)
6181 return SrcParts[SrcIdx];
6182
6183 // Combine shifted main part with carry from next part
6184 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6185 if (SrcIdx + 1 < NumParts) {
6186 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6187 Params.InvBitShift);
6188 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6189 }
6190 return Lo.getReg(0);
6191 }
6192
6193 case TargetOpcode::G_ASHR: {
6194 // Like LSHR but preserves sign bit
6195 unsigned SrcIdx = PartIdx + ShiftWords;
6196 if (SrcIdx >= NumParts)
6197 return Params.SignBit;
6198 if (!NeedsInterWordShift)
6199 return SrcParts[SrcIdx];
6200
6201 // Only the original MSB part uses arithmetic shift to preserve sign. All
6202 // other parts use logical shift since they're just moving data bits.
6203 auto Lo =
6204 (SrcIdx == NumParts - 1)
6205 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6206 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6207 Register HiSrc =
6208 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6209 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6210 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6211 }
6212
6213 default:
6214 llvm_unreachable("not a shift");
6215 }
6216}
6217
6219 Register MainOperand,
6220 Register ShiftAmt,
6221 LLT TargetTy,
6222 Register CarryOperand) {
6223 // This helper generates a single output part for variable shifts by combining
6224 // the main operand (shifted by BitShift) with carry bits from an adjacent
6225 // part.
6226
6227 // For G_ASHR, individual parts don't have their own sign bit, only the
6228 // complete value does. So we use LSHR for the main operand shift in ASHR
6229 // context.
6230 unsigned MainOpcode =
6231 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6232
6233 // Perform the primary shift on the main operand
6234 Register MainShifted =
6235 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6236 .getReg(0);
6237
6238 // No carry operand available
6239 if (!CarryOperand.isValid())
6240 return MainShifted;
6241
6242 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6243 // so carry bits aren't needed.
6244 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6245 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6246 LLT BoolTy = LLT::scalar(1);
6247 auto IsZeroBitShift =
6248 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6249
6250 // Extract bits from the adjacent part that will "carry over" into this part.
6251 // The carry direction is opposite to the main shift direction, so we can
6252 // align the two shifted values before combining them with OR.
6253
6254 // Determine the carry shift opcode (opposite direction)
6255 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6256 : TargetOpcode::G_SHL;
6257
6258 // Calculate inverse shift amount: BitWidth - ShiftAmt
6259 auto TargetBitsConst =
6260 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6261 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6262
6263 // Shift the carry operand
6264 Register CarryBits =
6266 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6267 .getReg(0);
6268
6269 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6270 // TargetBits which would be poison for the individual carry shift operation).
6271 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6272 Register SafeCarryBits =
6273 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6274 .getReg(0);
6275
6276 // Combine the main shifted part with the carry bits
6277 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6278}
6279
6282 const APInt &Amt,
6283 LLT TargetTy,
6284 LLT ShiftAmtTy) {
6285 // Any wide shift can be decomposed into WordShift + BitShift components.
6286 // When shift amount is known constant, directly compute the decomposition
6287 // values and generate constant registers.
6288 Register DstReg = MI.getOperand(0).getReg();
6289 Register SrcReg = MI.getOperand(1).getReg();
6290 LLT DstTy = MRI.getType(DstReg);
6291
6292 const unsigned DstBits = DstTy.getScalarSizeInBits();
6293 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6294 const unsigned NumParts = DstBits / TargetBits;
6295
6296 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6297
6298 // When the shift amount is known at compile time, we just calculate which
6299 // source parts contribute to each output part.
6300
6301 SmallVector<Register, 8> SrcParts;
6302 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6303
6304 if (Amt.isZero()) {
6305 // No shift needed, just copy
6306 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6307 MI.eraseFromParent();
6308 return Legalized;
6309 }
6310
6311 ShiftParams Params;
6312 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6313 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6314
6315 // Generate constants and values needed by all shift types
6316 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6317 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6318 Params.InvBitShift =
6319 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6320 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6321
6322 // For ASHR, we need the sign-extended value to fill shifted-out positions
6323 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6324 Params.SignBit =
6326 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6327 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6328 .getReg(0);
6329
6330 SmallVector<Register, 8> DstParts(NumParts);
6331 for (unsigned I = 0; I < NumParts; ++I)
6332 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6333 Params, TargetTy, ShiftAmtTy);
6334
6335 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6336 MI.eraseFromParent();
6337 return Legalized;
6338}
6339
6342 Register DstReg = MI.getOperand(0).getReg();
6343 Register SrcReg = MI.getOperand(1).getReg();
6344 Register AmtReg = MI.getOperand(2).getReg();
6345 LLT DstTy = MRI.getType(DstReg);
6346 LLT ShiftAmtTy = MRI.getType(AmtReg);
6347
6348 const unsigned DstBits = DstTy.getScalarSizeInBits();
6349 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6350 const unsigned NumParts = DstBits / TargetBits;
6351
6352 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6353 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6354
6355 // If the shift amount is known at compile time, we can use direct indexing
6356 // instead of generating select chains in the general case.
6357 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6358 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6359 ShiftAmtTy);
6360
6361 // For runtime-variable shift amounts, we must generate a more complex
6362 // sequence that handles all possible shift values using select chains.
6363
6364 // Split the input into target-sized pieces
6365 SmallVector<Register, 8> SrcParts;
6366 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6367
6368 // Shifting by zero should be a no-op.
6369 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6370 LLT BoolTy = LLT::scalar(1);
6371 auto IsZeroShift =
6372 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6373
6374 // Any wide shift can be decomposed into two components:
6375 // 1. WordShift: number of complete target-sized words to shift
6376 // 2. BitShift: number of bits to shift within each word
6377 //
6378 // Example: 128-bit >> 50 with 32-bit target:
6379 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6380 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6381 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6382 auto TargetBitsLog2Const =
6383 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6384 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6385
6386 Register WordShift =
6387 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6388 Register BitShift =
6389 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6390
6391 // Fill values:
6392 // - SHL/LSHR: fill with zeros
6393 // - ASHR: fill with sign-extended MSB
6394 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6395
6396 Register FillValue;
6397 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6398 auto TargetBitsMinusOneConst =
6399 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6400 FillValue = MIRBuilder
6401 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6402 TargetBitsMinusOneConst)
6403 .getReg(0);
6404 } else {
6405 FillValue = ZeroReg;
6406 }
6407
6408 SmallVector<Register, 8> DstParts(NumParts);
6409
6410 // For each output part, generate a select chain that chooses the correct
6411 // result based on the runtime WordShift value. This handles all possible
6412 // word shift amounts by pre-calculating what each would produce.
6413 for (unsigned I = 0; I < NumParts; ++I) {
6414 // Initialize with appropriate default value for this shift type
6415 Register InBoundsResult = FillValue;
6416
6417 // clang-format off
6418 // Build a branchless select chain by pre-computing results for all possible
6419 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6420 //
6421 // K=0: select(WordShift==0, result0, FillValue)
6422 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6423 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6424 // clang-format on
6425 for (unsigned K = 0; K < NumParts; ++K) {
6426 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6427 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6428 WordShift, WordShiftKConst);
6429
6430 // Calculate source indices for this word shift
6431 //
6432 // For 4-part 128-bit value with K=1 word shift:
6433 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6434 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6435 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6436 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6437 int MainSrcIdx;
6438 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6439
6440 switch (MI.getOpcode()) {
6441 case TargetOpcode::G_SHL:
6442 MainSrcIdx = (int)I - (int)K;
6443 CarrySrcIdx = MainSrcIdx - 1;
6444 break;
6445 case TargetOpcode::G_LSHR:
6446 case TargetOpcode::G_ASHR:
6447 MainSrcIdx = (int)I + (int)K;
6448 CarrySrcIdx = MainSrcIdx + 1;
6449 break;
6450 default:
6451 llvm_unreachable("Not a shift");
6452 }
6453
6454 // Check bounds and build the result for this word shift
6455 Register ResultForK;
6456 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6457 Register MainOp = SrcParts[MainSrcIdx];
6458 Register CarryOp;
6459
6460 // Determine carry operand with bounds checking
6461 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6462 CarryOp = SrcParts[CarrySrcIdx];
6463 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6464 CarrySrcIdx >= (int)NumParts)
6465 CarryOp = FillValue; // Use sign extension
6466
6467 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6468 TargetTy, CarryOp);
6469 } else {
6470 // Out of bounds - use fill value for this k
6471 ResultForK = FillValue;
6472 }
6473
6474 // Select this result if WordShift equals k
6475 InBoundsResult =
6477 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6478 .getReg(0);
6479 }
6480
6481 // Handle zero-shift special case: if shift is 0, use original input
6482 DstParts[I] =
6484 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6485 .getReg(0);
6486 }
6487
6488 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6489 MI.eraseFromParent();
6490 return Legalized;
6491}
6492
6495 LLT MoreTy) {
6496 assert(TypeIdx == 0 && "Expecting only Idx 0");
6497
6498 Observer.changingInstr(MI);
6499 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6500 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6501 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6502 moreElementsVectorSrc(MI, MoreTy, I);
6503 }
6504
6505 MachineBasicBlock &MBB = *MI.getParent();
6506 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6507 moreElementsVectorDst(MI, MoreTy, 0);
6508 Observer.changedInstr(MI);
6509 return Legalized;
6510}
6511
6512MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6513 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6514 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6515
6516 switch (Opcode) {
6517 default:
6519 "getNeutralElementForVecReduce called with invalid opcode!");
6520 case TargetOpcode::G_VECREDUCE_ADD:
6521 case TargetOpcode::G_VECREDUCE_OR:
6522 case TargetOpcode::G_VECREDUCE_XOR:
6523 case TargetOpcode::G_VECREDUCE_UMAX:
6524 return MIRBuilder.buildConstant(Ty, 0);
6525 case TargetOpcode::G_VECREDUCE_MUL:
6526 return MIRBuilder.buildConstant(Ty, 1);
6527 case TargetOpcode::G_VECREDUCE_AND:
6528 case TargetOpcode::G_VECREDUCE_UMIN:
6530 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6531 case TargetOpcode::G_VECREDUCE_SMAX:
6533 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6534 case TargetOpcode::G_VECREDUCE_SMIN:
6536 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6537 case TargetOpcode::G_VECREDUCE_FADD:
6538 return MIRBuilder.buildFConstant(Ty, -0.0);
6539 case TargetOpcode::G_VECREDUCE_FMUL:
6540 return MIRBuilder.buildFConstant(Ty, 1.0);
6541 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6542 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6543 assert(false && "getNeutralElementForVecReduce unimplemented for "
6544 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6545 }
6546 llvm_unreachable("switch expected to return!");
6547}
6548
6551 LLT MoreTy) {
6552 unsigned Opc = MI.getOpcode();
6553 switch (Opc) {
6554 case TargetOpcode::G_IMPLICIT_DEF:
6555 case TargetOpcode::G_LOAD: {
6556 if (TypeIdx != 0)
6557 return UnableToLegalize;
6558 Observer.changingInstr(MI);
6559 moreElementsVectorDst(MI, MoreTy, 0);
6560 Observer.changedInstr(MI);
6561 return Legalized;
6562 }
6563 case TargetOpcode::G_STORE:
6564 if (TypeIdx != 0)
6565 return UnableToLegalize;
6566 Observer.changingInstr(MI);
6567 moreElementsVectorSrc(MI, MoreTy, 0);
6568 Observer.changedInstr(MI);
6569 return Legalized;
6570 case TargetOpcode::G_AND:
6571 case TargetOpcode::G_OR:
6572 case TargetOpcode::G_XOR:
6573 case TargetOpcode::G_ADD:
6574 case TargetOpcode::G_SUB:
6575 case TargetOpcode::G_MUL:
6576 case TargetOpcode::G_FADD:
6577 case TargetOpcode::G_FSUB:
6578 case TargetOpcode::G_FMUL:
6579 case TargetOpcode::G_FDIV:
6580 case TargetOpcode::G_FCOPYSIGN:
6581 case TargetOpcode::G_UADDSAT:
6582 case TargetOpcode::G_USUBSAT:
6583 case TargetOpcode::G_SADDSAT:
6584 case TargetOpcode::G_SSUBSAT:
6585 case TargetOpcode::G_SMIN:
6586 case TargetOpcode::G_SMAX:
6587 case TargetOpcode::G_UMIN:
6588 case TargetOpcode::G_UMAX:
6589 case TargetOpcode::G_FMINNUM:
6590 case TargetOpcode::G_FMAXNUM:
6591 case TargetOpcode::G_FMINNUM_IEEE:
6592 case TargetOpcode::G_FMAXNUM_IEEE:
6593 case TargetOpcode::G_FMINIMUM:
6594 case TargetOpcode::G_FMAXIMUM:
6595 case TargetOpcode::G_FMINIMUMNUM:
6596 case TargetOpcode::G_FMAXIMUMNUM:
6597 case TargetOpcode::G_STRICT_FADD:
6598 case TargetOpcode::G_STRICT_FSUB:
6599 case TargetOpcode::G_STRICT_FMUL:
6600 case TargetOpcode::G_SHL:
6601 case TargetOpcode::G_ASHR:
6602 case TargetOpcode::G_LSHR: {
6603 Observer.changingInstr(MI);
6604 moreElementsVectorSrc(MI, MoreTy, 1);
6605 moreElementsVectorSrc(MI, MoreTy, 2);
6606 moreElementsVectorDst(MI, MoreTy, 0);
6607 Observer.changedInstr(MI);
6608 return Legalized;
6609 }
6610 case TargetOpcode::G_FMA:
6611 case TargetOpcode::G_STRICT_FMA:
6612 case TargetOpcode::G_FSHR:
6613 case TargetOpcode::G_FSHL: {
6614 Observer.changingInstr(MI);
6615 moreElementsVectorSrc(MI, MoreTy, 1);
6616 moreElementsVectorSrc(MI, MoreTy, 2);
6617 moreElementsVectorSrc(MI, MoreTy, 3);
6618 moreElementsVectorDst(MI, MoreTy, 0);
6619 Observer.changedInstr(MI);
6620 return Legalized;
6621 }
6622 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6623 case TargetOpcode::G_EXTRACT:
6624 if (TypeIdx != 1)
6625 return UnableToLegalize;
6626 Observer.changingInstr(MI);
6627 moreElementsVectorSrc(MI, MoreTy, 1);
6628 Observer.changedInstr(MI);
6629 return Legalized;
6630 case TargetOpcode::G_INSERT:
6631 case TargetOpcode::G_INSERT_VECTOR_ELT:
6632 case TargetOpcode::G_FREEZE:
6633 case TargetOpcode::G_FNEG:
6634 case TargetOpcode::G_FABS:
6635 case TargetOpcode::G_FSQRT:
6636 case TargetOpcode::G_FCEIL:
6637 case TargetOpcode::G_FFLOOR:
6638 case TargetOpcode::G_FNEARBYINT:
6639 case TargetOpcode::G_FRINT:
6640 case TargetOpcode::G_INTRINSIC_ROUND:
6641 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6642 case TargetOpcode::G_INTRINSIC_TRUNC:
6643 case TargetOpcode::G_BITREVERSE:
6644 case TargetOpcode::G_BSWAP:
6645 case TargetOpcode::G_FCANONICALIZE:
6646 case TargetOpcode::G_SEXT_INREG:
6647 case TargetOpcode::G_ABS:
6648 case TargetOpcode::G_CTLZ:
6649 case TargetOpcode::G_CTPOP:
6650 if (TypeIdx != 0)
6651 return UnableToLegalize;
6652 Observer.changingInstr(MI);
6653 moreElementsVectorSrc(MI, MoreTy, 1);
6654 moreElementsVectorDst(MI, MoreTy, 0);
6655 Observer.changedInstr(MI);
6656 return Legalized;
6657 case TargetOpcode::G_SELECT: {
6658 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6659 if (TypeIdx == 1) {
6660 if (!CondTy.isScalar() ||
6661 DstTy.getElementCount() != MoreTy.getElementCount())
6662 return UnableToLegalize;
6663
6664 // This is turning a scalar select of vectors into a vector
6665 // select. Broadcast the select condition.
6666 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6667 Observer.changingInstr(MI);
6668 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6669 Observer.changedInstr(MI);
6670 return Legalized;
6671 }
6672
6673 if (CondTy.isVector())
6674 return UnableToLegalize;
6675
6676 Observer.changingInstr(MI);
6677 moreElementsVectorSrc(MI, MoreTy, 2);
6678 moreElementsVectorSrc(MI, MoreTy, 3);
6679 moreElementsVectorDst(MI, MoreTy, 0);
6680 Observer.changedInstr(MI);
6681 return Legalized;
6682 }
6683 case TargetOpcode::G_UNMERGE_VALUES:
6684 return UnableToLegalize;
6685 case TargetOpcode::G_PHI:
6686 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6687 case TargetOpcode::G_SHUFFLE_VECTOR:
6688 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6689 case TargetOpcode::G_BUILD_VECTOR: {
6691 for (auto Op : MI.uses()) {
6692 Elts.push_back(Op.getReg());
6693 }
6694
6695 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6696 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6697 }
6698
6699 MIRBuilder.buildDeleteTrailingVectorElements(
6700 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6701 MI.eraseFromParent();
6702 return Legalized;
6703 }
6704 case TargetOpcode::G_SEXT:
6705 case TargetOpcode::G_ZEXT:
6706 case TargetOpcode::G_ANYEXT:
6707 case TargetOpcode::G_TRUNC:
6708 case TargetOpcode::G_FPTRUNC:
6709 case TargetOpcode::G_FPEXT:
6710 case TargetOpcode::G_FPTOSI:
6711 case TargetOpcode::G_FPTOUI:
6712 case TargetOpcode::G_FPTOSI_SAT:
6713 case TargetOpcode::G_FPTOUI_SAT:
6714 case TargetOpcode::G_SITOFP:
6715 case TargetOpcode::G_UITOFP: {
6716 Observer.changingInstr(MI);
6717 LLT SrcExtTy;
6718 LLT DstExtTy;
6719 if (TypeIdx == 0) {
6720 DstExtTy = MoreTy;
6721 SrcExtTy = LLT::fixed_vector(
6722 MoreTy.getNumElements(),
6723 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6724 } else {
6725 DstExtTy = LLT::fixed_vector(
6726 MoreTy.getNumElements(),
6727 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6728 SrcExtTy = MoreTy;
6729 }
6730 moreElementsVectorSrc(MI, SrcExtTy, 1);
6731 moreElementsVectorDst(MI, DstExtTy, 0);
6732 Observer.changedInstr(MI);
6733 return Legalized;
6734 }
6735 case TargetOpcode::G_ICMP:
6736 case TargetOpcode::G_FCMP: {
6737 if (TypeIdx != 1)
6738 return UnableToLegalize;
6739
6740 Observer.changingInstr(MI);
6741 moreElementsVectorSrc(MI, MoreTy, 2);
6742 moreElementsVectorSrc(MI, MoreTy, 3);
6743 LLT CondTy = LLT::fixed_vector(
6744 MoreTy.getNumElements(),
6745 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6746 moreElementsVectorDst(MI, CondTy, 0);
6747 Observer.changedInstr(MI);
6748 return Legalized;
6749 }
6750 case TargetOpcode::G_BITCAST: {
6751 if (TypeIdx != 0)
6752 return UnableToLegalize;
6753
6754 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6755 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6756
6757 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6758 if (coefficient % DstTy.getNumElements() != 0)
6759 return UnableToLegalize;
6760
6761 coefficient = coefficient / DstTy.getNumElements();
6762
6763 LLT NewTy = SrcTy.changeElementCount(
6764 ElementCount::get(coefficient, MoreTy.isScalable()));
6765 Observer.changingInstr(MI);
6766 moreElementsVectorSrc(MI, NewTy, 1);
6767 moreElementsVectorDst(MI, MoreTy, 0);
6768 Observer.changedInstr(MI);
6769 return Legalized;
6770 }
6771 case TargetOpcode::G_VECREDUCE_FADD:
6772 case TargetOpcode::G_VECREDUCE_FMUL:
6773 case TargetOpcode::G_VECREDUCE_ADD:
6774 case TargetOpcode::G_VECREDUCE_MUL:
6775 case TargetOpcode::G_VECREDUCE_AND:
6776 case TargetOpcode::G_VECREDUCE_OR:
6777 case TargetOpcode::G_VECREDUCE_XOR:
6778 case TargetOpcode::G_VECREDUCE_SMAX:
6779 case TargetOpcode::G_VECREDUCE_SMIN:
6780 case TargetOpcode::G_VECREDUCE_UMAX:
6781 case TargetOpcode::G_VECREDUCE_UMIN: {
6782 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6783 MachineOperand &MO = MI.getOperand(1);
6784 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6785 auto NeutralElement = getNeutralElementForVecReduce(
6786 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6787
6788 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6789 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6790 i != e; i++) {
6791 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6792 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6793 NeutralElement, Idx);
6794 }
6795
6796 Observer.changingInstr(MI);
6797 MO.setReg(NewVec.getReg(0));
6798 Observer.changedInstr(MI);
6799 return Legalized;
6800 }
6801
6802 default:
6803 return UnableToLegalize;
6804 }
6805}
6806
6809 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6810 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6811 unsigned MaskNumElts = Mask.size();
6812 unsigned SrcNumElts = SrcTy.getNumElements();
6813 LLT DestEltTy = DstTy.getElementType();
6814
6815 if (MaskNumElts == SrcNumElts)
6816 return Legalized;
6817
6818 if (MaskNumElts < SrcNumElts) {
6819 // Extend mask to match new destination vector size with
6820 // undef values.
6821 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6822 llvm::copy(Mask, NewMask.begin());
6823
6824 moreElementsVectorDst(MI, SrcTy, 0);
6825 MIRBuilder.setInstrAndDebugLoc(MI);
6826 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6827 MI.getOperand(1).getReg(),
6828 MI.getOperand(2).getReg(), NewMask);
6829 MI.eraseFromParent();
6830
6831 return Legalized;
6832 }
6833
6834 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6835 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6836 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
6837
6838 // Create new source vectors by concatenating the initial
6839 // source vectors with undefined vectors of the same size.
6840 auto Undef = MIRBuilder.buildUndef(SrcTy);
6841 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6842 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6843 MOps1[0] = MI.getOperand(1).getReg();
6844 MOps2[0] = MI.getOperand(2).getReg();
6845
6846 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6847 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6848
6849 // Readjust mask for new input vector length.
6850 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6851 for (unsigned I = 0; I != MaskNumElts; ++I) {
6852 int Idx = Mask[I];
6853 if (Idx >= static_cast<int>(SrcNumElts))
6854 Idx += PaddedMaskNumElts - SrcNumElts;
6855 MappedOps[I] = Idx;
6856 }
6857
6858 // If we got more elements than required, extract subvector.
6859 if (MaskNumElts != PaddedMaskNumElts) {
6860 auto Shuffle =
6861 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6862
6863 SmallVector<Register, 16> Elts(MaskNumElts);
6864 for (unsigned I = 0; I < MaskNumElts; ++I) {
6865 Elts[I] =
6866 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6867 .getReg(0);
6868 }
6869 MIRBuilder.buildBuildVector(DstReg, Elts);
6870 } else {
6871 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6872 }
6873
6874 MI.eraseFromParent();
6876}
6877
6880 unsigned int TypeIdx, LLT MoreTy) {
6881 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6882 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6883 unsigned NumElts = DstTy.getNumElements();
6884 unsigned WidenNumElts = MoreTy.getNumElements();
6885
6886 if (DstTy.isVector() && Src1Ty.isVector() &&
6887 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6889 }
6890
6891 if (TypeIdx != 0)
6892 return UnableToLegalize;
6893
6894 // Expect a canonicalized shuffle.
6895 if (DstTy != Src1Ty || DstTy != Src2Ty)
6896 return UnableToLegalize;
6897
6898 moreElementsVectorSrc(MI, MoreTy, 1);
6899 moreElementsVectorSrc(MI, MoreTy, 2);
6900
6901 // Adjust mask based on new input vector length.
6902 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6903 for (unsigned I = 0; I != NumElts; ++I) {
6904 int Idx = Mask[I];
6905 if (Idx < static_cast<int>(NumElts))
6906 NewMask[I] = Idx;
6907 else
6908 NewMask[I] = Idx - NumElts + WidenNumElts;
6909 }
6910 moreElementsVectorDst(MI, MoreTy, 0);
6911 MIRBuilder.setInstrAndDebugLoc(MI);
6912 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6913 MI.getOperand(1).getReg(),
6914 MI.getOperand(2).getReg(), NewMask);
6915 MI.eraseFromParent();
6916 return Legalized;
6917}
6918
6919void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
6920 ArrayRef<Register> Src1Regs,
6921 ArrayRef<Register> Src2Regs,
6922 LLT NarrowTy) {
6924 unsigned SrcParts = Src1Regs.size();
6925 unsigned DstParts = DstRegs.size();
6926
6927 unsigned DstIdx = 0; // Low bits of the result.
6928 Register FactorSum =
6929 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6930 DstRegs[DstIdx] = FactorSum;
6931
6932 Register CarrySumPrevDstIdx;
6934
6935 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6936 // Collect low parts of muls for DstIdx.
6937 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6938 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6940 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6941 Factors.push_back(Mul.getReg(0));
6942 }
6943 // Collect high parts of muls from previous DstIdx.
6944 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6945 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6946 MachineInstrBuilder Umulh =
6947 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6948 Factors.push_back(Umulh.getReg(0));
6949 }
6950 // Add CarrySum from additions calculated for previous DstIdx.
6951 if (DstIdx != 1) {
6952 Factors.push_back(CarrySumPrevDstIdx);
6953 }
6954
6955 Register CarrySum;
6956 // Add all factors and accumulate all carries into CarrySum.
6957 if (DstIdx != DstParts - 1) {
6958 MachineInstrBuilder Uaddo =
6959 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6960 FactorSum = Uaddo.getReg(0);
6961 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6962 for (unsigned i = 2; i < Factors.size(); ++i) {
6963 MachineInstrBuilder Uaddo =
6964 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6965 FactorSum = Uaddo.getReg(0);
6966 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6967 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6968 }
6969 } else {
6970 // Since value for the next index is not calculated, neither is CarrySum.
6971 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6972 for (unsigned i = 2; i < Factors.size(); ++i)
6973 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6974 }
6975
6976 CarrySumPrevDstIdx = CarrySum;
6977 DstRegs[DstIdx] = FactorSum;
6978 Factors.clear();
6979 }
6980}
6981
6984 LLT NarrowTy) {
6985 if (TypeIdx != 0)
6986 return UnableToLegalize;
6987
6988 Register DstReg = MI.getOperand(0).getReg();
6989 LLT DstType = MRI.getType(DstReg);
6990 // FIXME: add support for vector types
6991 if (DstType.isVector())
6992 return UnableToLegalize;
6993
6994 unsigned Opcode = MI.getOpcode();
6995 unsigned OpO, OpE, OpF;
6996 switch (Opcode) {
6997 case TargetOpcode::G_SADDO:
6998 case TargetOpcode::G_SADDE:
6999 case TargetOpcode::G_UADDO:
7000 case TargetOpcode::G_UADDE:
7001 case TargetOpcode::G_ADD:
7002 OpO = TargetOpcode::G_UADDO;
7003 OpE = TargetOpcode::G_UADDE;
7004 OpF = TargetOpcode::G_UADDE;
7005 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7006 OpF = TargetOpcode::G_SADDE;
7007 break;
7008 case TargetOpcode::G_SSUBO:
7009 case TargetOpcode::G_SSUBE:
7010 case TargetOpcode::G_USUBO:
7011 case TargetOpcode::G_USUBE:
7012 case TargetOpcode::G_SUB:
7013 OpO = TargetOpcode::G_USUBO;
7014 OpE = TargetOpcode::G_USUBE;
7015 OpF = TargetOpcode::G_USUBE;
7016 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7017 OpF = TargetOpcode::G_SSUBE;
7018 break;
7019 default:
7020 llvm_unreachable("Unexpected add/sub opcode!");
7021 }
7022
7023 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7024 unsigned NumDefs = MI.getNumExplicitDefs();
7025 Register Src1 = MI.getOperand(NumDefs).getReg();
7026 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7027 Register CarryDst, CarryIn;
7028 if (NumDefs == 2)
7029 CarryDst = MI.getOperand(1).getReg();
7030 if (MI.getNumOperands() == NumDefs + 3)
7031 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7032
7033 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7034 LLT LeftoverTy, DummyTy;
7035 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7036 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7037 MIRBuilder, MRI);
7038 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7039 MRI);
7040
7041 int NarrowParts = Src1Regs.size();
7042 Src1Regs.append(Src1Left);
7043 Src2Regs.append(Src2Left);
7044 DstRegs.reserve(Src1Regs.size());
7045
7046 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7047 Register DstReg =
7048 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7049 Register CarryOut;
7050 // Forward the final carry-out to the destination register
7051 if (i == e - 1 && CarryDst)
7052 CarryOut = CarryDst;
7053 else
7054 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7055
7056 if (!CarryIn) {
7057 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7058 {Src1Regs[i], Src2Regs[i]});
7059 } else if (i == e - 1) {
7060 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7061 {Src1Regs[i], Src2Regs[i], CarryIn});
7062 } else {
7063 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7064 {Src1Regs[i], Src2Regs[i], CarryIn});
7065 }
7066
7067 DstRegs.push_back(DstReg);
7068 CarryIn = CarryOut;
7069 }
7070 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7071 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7072 ArrayRef(DstRegs).drop_front(NarrowParts));
7073
7074 MI.eraseFromParent();
7075 return Legalized;
7076}
7077
7080 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7081
7082 LLT Ty = MRI.getType(DstReg);
7083 if (Ty.isVector())
7084 return UnableToLegalize;
7085
7086 unsigned Size = Ty.getSizeInBits();
7087 unsigned NarrowSize = NarrowTy.getSizeInBits();
7088 if (Size % NarrowSize != 0)
7089 return UnableToLegalize;
7090
7091 unsigned NumParts = Size / NarrowSize;
7092 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7093 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7094
7095 SmallVector<Register, 2> Src1Parts, Src2Parts;
7096 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7097 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7098 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7099 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7100
7101 // Take only high half of registers if this is high mul.
7102 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7103 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7104 MI.eraseFromParent();
7105 return Legalized;
7106}
7107
7110 LLT NarrowTy) {
7111 if (TypeIdx != 0)
7112 return UnableToLegalize;
7113
7114 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7115
7116 Register Src = MI.getOperand(1).getReg();
7117 LLT SrcTy = MRI.getType(Src);
7118
7119 // If all finite floats fit into the narrowed integer type, we can just swap
7120 // out the result type. This is practically only useful for conversions from
7121 // half to at least 16-bits, so just handle the one case.
7122 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7123 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7124 return UnableToLegalize;
7125
7126 Observer.changingInstr(MI);
7127 narrowScalarDst(MI, NarrowTy, 0,
7128 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7129 Observer.changedInstr(MI);
7130 return Legalized;
7131}
7132
7135 LLT NarrowTy) {
7136 if (TypeIdx != 1)
7137 return UnableToLegalize;
7138
7139 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7140
7141 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7142 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7143 // NarrowSize.
7144 if (SizeOp1 % NarrowSize != 0)
7145 return UnableToLegalize;
7146 int NumParts = SizeOp1 / NarrowSize;
7147
7148 SmallVector<Register, 2> SrcRegs, DstRegs;
7149 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7150 MIRBuilder, MRI);
7151
7152 Register OpReg = MI.getOperand(0).getReg();
7153 uint64_t OpStart = MI.getOperand(2).getImm();
7154 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7155 for (int i = 0; i < NumParts; ++i) {
7156 unsigned SrcStart = i * NarrowSize;
7157
7158 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7159 // No part of the extract uses this subregister, ignore it.
7160 continue;
7161 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7162 // The entire subregister is extracted, forward the value.
7163 DstRegs.push_back(SrcRegs[i]);
7164 continue;
7165 }
7166
7167 // OpSegStart is where this destination segment would start in OpReg if it
7168 // extended infinitely in both directions.
7169 int64_t ExtractOffset;
7170 uint64_t SegSize;
7171 if (OpStart < SrcStart) {
7172 ExtractOffset = 0;
7173 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7174 } else {
7175 ExtractOffset = OpStart - SrcStart;
7176 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7177 }
7178
7179 Register SegReg = SrcRegs[i];
7180 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7181 // A genuine extract is needed.
7182 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7183 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7184 }
7185
7186 DstRegs.push_back(SegReg);
7187 }
7188
7189 Register DstReg = MI.getOperand(0).getReg();
7190 if (MRI.getType(DstReg).isVector())
7191 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7192 else if (DstRegs.size() > 1)
7193 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7194 else
7195 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7196 MI.eraseFromParent();
7197 return Legalized;
7198}
7199
7202 LLT NarrowTy) {
7203 // FIXME: Don't know how to handle secondary types yet.
7204 if (TypeIdx != 0)
7205 return UnableToLegalize;
7206
7207 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7208 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7209 LLT LeftoverTy;
7210 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7211 LeftoverRegs, MIRBuilder, MRI);
7212
7213 SrcRegs.append(LeftoverRegs);
7214
7215 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7216 Register OpReg = MI.getOperand(2).getReg();
7217 uint64_t OpStart = MI.getOperand(3).getImm();
7218 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7219 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7220 unsigned DstStart = I * NarrowSize;
7221
7222 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7223 // The entire subregister is defined by this insert, forward the new
7224 // value.
7225 DstRegs.push_back(OpReg);
7226 continue;
7227 }
7228
7229 Register SrcReg = SrcRegs[I];
7230 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7231 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7232 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7233 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7234 }
7235
7236 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7237 // No part of the insert affects this subregister, forward the original.
7238 DstRegs.push_back(SrcReg);
7239 continue;
7240 }
7241
7242 // OpSegStart is where this destination segment would start in OpReg if it
7243 // extended infinitely in both directions.
7244 int64_t ExtractOffset, InsertOffset;
7245 uint64_t SegSize;
7246 if (OpStart < DstStart) {
7247 InsertOffset = 0;
7248 ExtractOffset = DstStart - OpStart;
7249 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7250 } else {
7251 InsertOffset = OpStart - DstStart;
7252 ExtractOffset = 0;
7253 SegSize =
7254 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7255 }
7256
7257 Register SegReg = OpReg;
7258 if (ExtractOffset != 0 || SegSize != OpSize) {
7259 // A genuine extract is needed.
7260 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7261 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7262 }
7263
7264 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7265 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7266 DstRegs.push_back(DstReg);
7267 }
7268
7269 uint64_t WideSize = DstRegs.size() * NarrowSize;
7270 Register DstReg = MI.getOperand(0).getReg();
7271 if (WideSize > RegTy.getSizeInBits()) {
7272 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7273 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7274 MIRBuilder.buildTrunc(DstReg, MergeReg);
7275 } else
7276 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7277
7278 MI.eraseFromParent();
7279 return Legalized;
7280}
7281
7284 LLT NarrowTy) {
7285 Register DstReg = MI.getOperand(0).getReg();
7286 LLT DstTy = MRI.getType(DstReg);
7287
7288 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7289
7290 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7291 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7292 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7293 LLT LeftoverTy;
7294 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7295 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7296 return UnableToLegalize;
7297
7298 LLT Unused;
7299 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7300 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7301 llvm_unreachable("inconsistent extractParts result");
7302
7303 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7304 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7305 {Src0Regs[I], Src1Regs[I]});
7306 DstRegs.push_back(Inst.getReg(0));
7307 }
7308
7309 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7310 auto Inst = MIRBuilder.buildInstr(
7311 MI.getOpcode(),
7312 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7313 DstLeftoverRegs.push_back(Inst.getReg(0));
7314 }
7315
7316 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7317 LeftoverTy, DstLeftoverRegs);
7318
7319 MI.eraseFromParent();
7320 return Legalized;
7321}
7322
7325 LLT NarrowTy) {
7326 if (TypeIdx != 0)
7327 return UnableToLegalize;
7328
7329 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7330
7331 LLT DstTy = MRI.getType(DstReg);
7332 if (DstTy.isVector())
7333 return UnableToLegalize;
7334
7336 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7337 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7338 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7339
7340 MI.eraseFromParent();
7341 return Legalized;
7342}
7343
7346 LLT NarrowTy) {
7347 if (TypeIdx != 0)
7348 return UnableToLegalize;
7349
7350 Register CondReg = MI.getOperand(1).getReg();
7351 LLT CondTy = MRI.getType(CondReg);
7352 if (CondTy.isVector()) // TODO: Handle vselect
7353 return UnableToLegalize;
7354
7355 Register DstReg = MI.getOperand(0).getReg();
7356 LLT DstTy = MRI.getType(DstReg);
7357
7358 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7359 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7360 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7361 LLT LeftoverTy;
7362 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7363 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7364 return UnableToLegalize;
7365
7366 LLT Unused;
7367 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7368 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7369 llvm_unreachable("inconsistent extractParts result");
7370
7371 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7372 auto Select = MIRBuilder.buildSelect(NarrowTy,
7373 CondReg, Src1Regs[I], Src2Regs[I]);
7374 DstRegs.push_back(Select.getReg(0));
7375 }
7376
7377 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7378 auto Select = MIRBuilder.buildSelect(
7379 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7380 DstLeftoverRegs.push_back(Select.getReg(0));
7381 }
7382
7383 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7384 LeftoverTy, DstLeftoverRegs);
7385
7386 MI.eraseFromParent();
7387 return Legalized;
7388}
7389
7392 LLT NarrowTy) {
7393 if (TypeIdx != 1)
7394 return UnableToLegalize;
7395
7396 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7397 unsigned NarrowSize = NarrowTy.getSizeInBits();
7398
7399 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7400 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7401
7403 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7404 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7405 auto C_0 = B.buildConstant(NarrowTy, 0);
7406 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7407 UnmergeSrc.getReg(1), C_0);
7408 auto LoCTLZ = IsUndef ?
7409 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7410 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7411 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7412 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7413 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7414 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7415
7416 MI.eraseFromParent();
7417 return Legalized;
7418 }
7419
7420 return UnableToLegalize;
7421}
7422
7425 LLT NarrowTy) {
7426 if (TypeIdx != 1)
7427 return UnableToLegalize;
7428
7429 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7430 unsigned NarrowSize = NarrowTy.getSizeInBits();
7431
7432 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7433 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7434
7436 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7437 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7438 auto C_0 = B.buildConstant(NarrowTy, 0);
7439 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7440 UnmergeSrc.getReg(0), C_0);
7441 auto HiCTTZ = IsUndef ?
7442 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7443 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7444 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7445 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7446 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7447 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7448
7449 MI.eraseFromParent();
7450 return Legalized;
7451 }
7452
7453 return UnableToLegalize;
7454}
7455
7458 LLT NarrowTy) {
7459 if (TypeIdx != 1)
7460 return UnableToLegalize;
7461
7462 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7463 unsigned NarrowSize = NarrowTy.getSizeInBits();
7464
7465 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7466 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7467
7468 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7469 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7470 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7471
7472 MI.eraseFromParent();
7473 return Legalized;
7474 }
7475
7476 return UnableToLegalize;
7477}
7478
7481 LLT NarrowTy) {
7482 if (TypeIdx != 1)
7483 return UnableToLegalize;
7484
7486 Register ExpReg = MI.getOperand(2).getReg();
7487 LLT ExpTy = MRI.getType(ExpReg);
7488
7489 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7490
7491 // Clamp the exponent to the range of the target type.
7492 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7493 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7494 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7495 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7496
7497 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7498 Observer.changingInstr(MI);
7499 MI.getOperand(2).setReg(Trunc.getReg(0));
7500 Observer.changedInstr(MI);
7501 return Legalized;
7502}
7503
7506 unsigned Opc = MI.getOpcode();
7507 const auto &TII = MIRBuilder.getTII();
7508 auto isSupported = [this](const LegalityQuery &Q) {
7509 auto QAction = LI.getAction(Q).Action;
7510 return QAction == Legal || QAction == Libcall || QAction == Custom;
7511 };
7512 switch (Opc) {
7513 default:
7514 return UnableToLegalize;
7515 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7516 // This trivially expands to CTLZ.
7517 Observer.changingInstr(MI);
7518 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7519 Observer.changedInstr(MI);
7520 return Legalized;
7521 }
7522 case TargetOpcode::G_CTLZ: {
7523 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7524 unsigned Len = SrcTy.getSizeInBits();
7525
7526 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7527 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7528 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7529 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7530 auto ICmp = MIRBuilder.buildICmp(
7531 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7532 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7533 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7534 MI.eraseFromParent();
7535 return Legalized;
7536 }
7537 // for now, we do this:
7538 // NewLen = NextPowerOf2(Len);
7539 // x = x | (x >> 1);
7540 // x = x | (x >> 2);
7541 // ...
7542 // x = x | (x >>16);
7543 // x = x | (x >>32); // for 64-bit input
7544 // Upto NewLen/2
7545 // return Len - popcount(x);
7546 //
7547 // Ref: "Hacker's Delight" by Henry Warren
7548 Register Op = SrcReg;
7549 unsigned NewLen = PowerOf2Ceil(Len);
7550 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7551 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7552 auto MIBOp = MIRBuilder.buildOr(
7553 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7554 Op = MIBOp.getReg(0);
7555 }
7556 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7557 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7558 MIBPop);
7559 MI.eraseFromParent();
7560 return Legalized;
7561 }
7562 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7563 // This trivially expands to CTTZ.
7564 Observer.changingInstr(MI);
7565 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7566 Observer.changedInstr(MI);
7567 return Legalized;
7568 }
7569 case TargetOpcode::G_CTTZ: {
7570 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7571
7572 unsigned Len = SrcTy.getSizeInBits();
7573 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7574 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7575 // zero.
7576 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7577 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7578 auto ICmp = MIRBuilder.buildICmp(
7579 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7580 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7581 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7582 MI.eraseFromParent();
7583 return Legalized;
7584 }
7585 // for now, we use: { return popcount(~x & (x - 1)); }
7586 // unless the target has ctlz but not ctpop, in which case we use:
7587 // { return 32 - nlz(~x & (x-1)); }
7588 // Ref: "Hacker's Delight" by Henry Warren
7589 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7590 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7591 auto MIBTmp = MIRBuilder.buildAnd(
7592 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7593 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7594 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7595 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7596 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7597 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7598 MI.eraseFromParent();
7599 return Legalized;
7600 }
7601 Observer.changingInstr(MI);
7602 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7603 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7604 Observer.changedInstr(MI);
7605 return Legalized;
7606 }
7607 case TargetOpcode::G_CTPOP: {
7608 Register SrcReg = MI.getOperand(1).getReg();
7609 LLT Ty = MRI.getType(SrcReg);
7610 unsigned Size = Ty.getSizeInBits();
7612
7613 // Count set bits in blocks of 2 bits. Default approach would be
7614 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7615 // We use following formula instead:
7616 // B2Count = val - { (val >> 1) & 0x55555555 }
7617 // since it gives same result in blocks of 2 with one instruction less.
7618 auto C_1 = B.buildConstant(Ty, 1);
7619 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7620 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7621 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7622 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7623 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7624
7625 // In order to get count in blocks of 4 add values from adjacent block of 2.
7626 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7627 auto C_2 = B.buildConstant(Ty, 2);
7628 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7629 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7630 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7631 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7632 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7633 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7634
7635 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7636 // addition since count value sits in range {0,...,8} and 4 bits are enough
7637 // to hold such binary values. After addition high 4 bits still hold count
7638 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7639 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7640 auto C_4 = B.buildConstant(Ty, 4);
7641 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7642 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7643 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7644 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7645 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7646
7647 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7648 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7649 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7650 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7651
7652 // Shift count result from 8 high bits to low bits.
7653 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7654
7655 auto IsMulSupported = [this](const LLT Ty) {
7656 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7657 return Action == Legal || Action == WidenScalar || Action == Custom;
7658 };
7659 if (IsMulSupported(Ty)) {
7660 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7661 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7662 } else {
7663 auto ResTmp = B8Count;
7664 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7665 auto ShiftC = B.buildConstant(Ty, Shift);
7666 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7667 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7668 }
7669 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7670 }
7671 MI.eraseFromParent();
7672 return Legalized;
7673 }
7674 }
7675}
7676
7677// Check that (every element of) Reg is undef or not an exact multiple of BW.
7679 Register Reg, unsigned BW) {
7680 return matchUnaryPredicate(
7681 MRI, Reg,
7682 [=](const Constant *C) {
7683 // Null constant here means an undef.
7685 return !CI || CI->getValue().urem(BW) != 0;
7686 },
7687 /*AllowUndefs*/ true);
7688}
7689
7692 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7693 LLT Ty = MRI.getType(Dst);
7694 LLT ShTy = MRI.getType(Z);
7695
7696 unsigned BW = Ty.getScalarSizeInBits();
7697
7698 if (!isPowerOf2_32(BW))
7699 return UnableToLegalize;
7700
7701 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7702 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7703
7704 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7705 // fshl X, Y, Z -> fshr X, Y, -Z
7706 // fshr X, Y, Z -> fshl X, Y, -Z
7707 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7708 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7709 } else {
7710 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7711 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7712 auto One = MIRBuilder.buildConstant(ShTy, 1);
7713 if (IsFSHL) {
7714 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7715 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7716 } else {
7717 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7718 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7719 }
7720
7721 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7722 }
7723
7724 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7725 MI.eraseFromParent();
7726 return Legalized;
7727}
7728
7731 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7732 LLT Ty = MRI.getType(Dst);
7733 LLT ShTy = MRI.getType(Z);
7734
7735 const unsigned BW = Ty.getScalarSizeInBits();
7736 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7737
7738 Register ShX, ShY;
7739 Register ShAmt, InvShAmt;
7740
7741 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7742 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7743 // fshl: X << C | Y >> (BW - C)
7744 // fshr: X << (BW - C) | Y >> C
7745 // where C = Z % BW is not zero
7746 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7747 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7748 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7749 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7750 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7751 } else {
7752 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7753 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7754 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7755 if (isPowerOf2_32(BW)) {
7756 // Z % BW -> Z & (BW - 1)
7757 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7758 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7759 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7760 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7761 } else {
7762 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7763 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7764 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7765 }
7766
7767 auto One = MIRBuilder.buildConstant(ShTy, 1);
7768 if (IsFSHL) {
7769 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7770 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7771 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7772 } else {
7773 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7774 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7775 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7776 }
7777 }
7778
7779 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7780 MI.eraseFromParent();
7781 return Legalized;
7782}
7783
7786 // These operations approximately do the following (while avoiding undefined
7787 // shifts by BW):
7788 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7789 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7790 Register Dst = MI.getOperand(0).getReg();
7791 LLT Ty = MRI.getType(Dst);
7792 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7793
7794 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7795 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7796
7797 // TODO: Use smarter heuristic that accounts for vector legalization.
7798 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7799 return lowerFunnelShiftAsShifts(MI);
7800
7801 // This only works for powers of 2, fallback to shifts if it fails.
7802 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7803 if (Result == UnableToLegalize)
7804 return lowerFunnelShiftAsShifts(MI);
7805 return Result;
7806}
7807
7809 auto [Dst, Src] = MI.getFirst2Regs();
7810 LLT DstTy = MRI.getType(Dst);
7811 LLT SrcTy = MRI.getType(Src);
7812
7813 uint32_t DstTySize = DstTy.getSizeInBits();
7814 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7815 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7816
7817 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7818 !isPowerOf2_32(SrcTyScalarSize))
7819 return UnableToLegalize;
7820
7821 // The step between extend is too large, split it by creating an intermediate
7822 // extend instruction
7823 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7824 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7825 // If the destination type is illegal, split it into multiple statements
7826 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7827 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7828 // Unmerge the vector
7829 LLT EltTy = MidTy.changeElementCount(
7831 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7832
7833 // ZExt the vectors
7834 LLT ZExtResTy = DstTy.changeElementCount(
7836 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7837 {UnmergeSrc.getReg(0)});
7838 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7839 {UnmergeSrc.getReg(1)});
7840
7841 // Merge the ending vectors
7842 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7843
7844 MI.eraseFromParent();
7845 return Legalized;
7846 }
7847 return UnableToLegalize;
7848}
7849
7851 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7852 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7853 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7854 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7855 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7856 // %lo16(<4 x s16>) = G_TRUNC %inlo
7857 // %hi16(<4 x s16>) = G_TRUNC %inhi
7858 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7859 // %res(<8 x s8>) = G_TRUNC %in16
7860
7861 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7862
7863 Register DstReg = MI.getOperand(0).getReg();
7864 Register SrcReg = MI.getOperand(1).getReg();
7865 LLT DstTy = MRI.getType(DstReg);
7866 LLT SrcTy = MRI.getType(SrcReg);
7867
7868 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7870 isPowerOf2_32(SrcTy.getNumElements()) &&
7871 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7872 // Split input type.
7873 LLT SplitSrcTy = SrcTy.changeElementCount(
7874 SrcTy.getElementCount().divideCoefficientBy(2));
7875
7876 // First, split the source into two smaller vectors.
7877 SmallVector<Register, 2> SplitSrcs;
7878 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7879
7880 // Truncate the splits into intermediate narrower elements.
7881 LLT InterTy;
7882 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7883 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7884 else
7885 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7886 for (Register &Src : SplitSrcs)
7887 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7888
7889 // Combine the new truncates into one vector
7890 auto Merge = MIRBuilder.buildMergeLikeInstr(
7891 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7892
7893 // Truncate the new vector to the final result type
7894 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7895 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7896 else
7897 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7898
7899 MI.eraseFromParent();
7900
7901 return Legalized;
7902 }
7903 return UnableToLegalize;
7904}
7905
7908 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7909 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7910 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7911 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7912 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7913 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
7914 MI.eraseFromParent();
7915 return Legalized;
7916}
7917
7919 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
7920
7921 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7922 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
7923
7924 MIRBuilder.setInstrAndDebugLoc(MI);
7925
7926 // If a rotate in the other direction is supported, use it.
7927 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7928 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
7929 isPowerOf2_32(EltSizeInBits))
7930 return lowerRotateWithReverseRotate(MI);
7931
7932 // If a funnel shift is supported, use it.
7933 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7934 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7935 bool IsFShLegal = false;
7936 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7937 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7938 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
7939 Register R3) {
7940 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
7941 MI.eraseFromParent();
7942 return Legalized;
7943 };
7944 // If a funnel shift in the other direction is supported, use it.
7945 if (IsFShLegal) {
7946 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7947 } else if (isPowerOf2_32(EltSizeInBits)) {
7948 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
7949 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7950 }
7951 }
7952
7953 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
7954 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7955 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7956 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
7957 Register ShVal;
7958 Register RevShiftVal;
7959 if (isPowerOf2_32(EltSizeInBits)) {
7960 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7961 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7962 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
7963 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7964 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7965 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7966 RevShiftVal =
7967 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
7968 } else {
7969 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7970 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7971 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
7972 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
7973 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
7974 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7975 auto One = MIRBuilder.buildConstant(AmtTy, 1);
7976 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7977 RevShiftVal =
7978 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
7979 }
7980 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
7981 MI.eraseFromParent();
7982 return Legalized;
7983}
7984
7985// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7986// representation.
7989 auto [Dst, Src] = MI.getFirst2Regs();
7990 const LLT S64 = LLT::scalar(64);
7991 const LLT S32 = LLT::scalar(32);
7992 const LLT S1 = LLT::scalar(1);
7993
7994 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7995
7996 // unsigned cul2f(ulong u) {
7997 // uint lz = clz(u);
7998 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7999 // u = (u << lz) & 0x7fffffffffffffffUL;
8000 // ulong t = u & 0xffffffffffUL;
8001 // uint v = (e << 23) | (uint)(u >> 40);
8002 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8003 // return as_float(v + r);
8004 // }
8005
8006 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8007 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8008
8009 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8010
8011 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8012 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8013
8014 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8015 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8016
8017 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8018 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8019
8020 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8021
8022 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8023 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8024
8025 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8026 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8027 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8028
8029 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8030 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8031 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8032 auto One = MIRBuilder.buildConstant(S32, 1);
8033
8034 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8035 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8036 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8037 MIRBuilder.buildAdd(Dst, V, R);
8038
8039 MI.eraseFromParent();
8040 return Legalized;
8041}
8042
8043// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8044// operations and G_SITOFP
8047 auto [Dst, Src] = MI.getFirst2Regs();
8048 const LLT S64 = LLT::scalar(64);
8049 const LLT S32 = LLT::scalar(32);
8050 const LLT S1 = LLT::scalar(1);
8051
8052 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8053
8054 // For i64 < INT_MAX we simply reuse SITOFP.
8055 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8056 // saved before division, convert to float by SITOFP, multiply the result
8057 // by 2.
8058 auto One = MIRBuilder.buildConstant(S64, 1);
8059 auto Zero = MIRBuilder.buildConstant(S64, 0);
8060 // Result if Src < INT_MAX
8061 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8062 // Result if Src >= INT_MAX
8063 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8064 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8065 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8066 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8067 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8068 // Check if the original value is larger than INT_MAX by comparing with
8069 // zero to pick one of the two conversions.
8070 auto IsLarge =
8071 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8072 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8073
8074 MI.eraseFromParent();
8075 return Legalized;
8076}
8077
8078// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8079// IEEE double representation.
8082 auto [Dst, Src] = MI.getFirst2Regs();
8083 const LLT S64 = LLT::scalar(64);
8084 const LLT S32 = LLT::scalar(32);
8085
8086 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8087
8088 // We create double value from 32 bit parts with 32 exponent difference.
8089 // Note that + and - are float operations that adjust the implicit leading
8090 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8091 //
8092 // X = 2^52 * 1.0...LowBits
8093 // Y = 2^84 * 1.0...HighBits
8094 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8095 // = - 2^52 * 1.0...HighBits
8096 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8097 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8098 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8099 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8100 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8101 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8102
8103 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8104 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8105 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8106 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8107 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8108 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8109 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8110
8111 MI.eraseFromParent();
8112 return Legalized;
8113}
8114
8115/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8116/// convert fpround f64->f16 without double-rounding, so we manually perform the
8117/// lowering here where we know it is valid.
8120 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8121 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8122 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8123 : MIRBuilder.buildSITOFP(SrcTy, Src);
8124 LLT S32Ty = SrcTy.changeElementSize(32);
8125 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8126 MIRBuilder.buildFPTrunc(Dst, M2);
8127 MI.eraseFromParent();
8129}
8130
8132 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8133
8134 if (SrcTy == LLT::scalar(1)) {
8135 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8136 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8137 MIRBuilder.buildSelect(Dst, Src, True, False);
8138 MI.eraseFromParent();
8139 return Legalized;
8140 }
8141
8142 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8143 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8144
8145 if (SrcTy != LLT::scalar(64))
8146 return UnableToLegalize;
8147
8148 if (DstTy == LLT::scalar(32))
8149 // TODO: SelectionDAG has several alternative expansions to port which may
8150 // be more reasonable depending on the available instructions. We also need
8151 // a more advanced mechanism to choose an optimal version depending on
8152 // target features such as sitofp or CTLZ availability.
8154
8155 if (DstTy == LLT::scalar(64))
8157
8158 return UnableToLegalize;
8159}
8160
8162 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8163
8164 const LLT S64 = LLT::scalar(64);
8165 const LLT S32 = LLT::scalar(32);
8166 const LLT S1 = LLT::scalar(1);
8167
8168 if (SrcTy == S1) {
8169 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8170 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8171 MIRBuilder.buildSelect(Dst, Src, True, False);
8172 MI.eraseFromParent();
8173 return Legalized;
8174 }
8175
8176 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8177 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8178
8179 if (SrcTy != S64)
8180 return UnableToLegalize;
8181
8182 if (DstTy == S32) {
8183 // signed cl2f(long l) {
8184 // long s = l >> 63;
8185 // float r = cul2f((l + s) ^ s);
8186 // return s ? -r : r;
8187 // }
8188 Register L = Src;
8189 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8190 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8191
8192 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8193 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8194 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8195
8196 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8197 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8198 MIRBuilder.buildConstant(S64, 0));
8199 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8200 MI.eraseFromParent();
8201 return Legalized;
8202 }
8203
8204 return UnableToLegalize;
8205}
8206
8208 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8209 const LLT S64 = LLT::scalar(64);
8210 const LLT S32 = LLT::scalar(32);
8211
8212 if (SrcTy != S64 && SrcTy != S32)
8213 return UnableToLegalize;
8214 if (DstTy != S32 && DstTy != S64)
8215 return UnableToLegalize;
8216
8217 // FPTOSI gives same result as FPTOUI for positive signed integers.
8218 // FPTOUI needs to deal with fp values that convert to unsigned integers
8219 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8220
8221 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8222 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8224 APInt::getZero(SrcTy.getSizeInBits()));
8225 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8226
8227 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8228
8229 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8230 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8231 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8232 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8233 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8234 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8235 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8236
8237 const LLT S1 = LLT::scalar(1);
8238
8239 MachineInstrBuilder FCMP =
8240 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8241 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8242
8243 MI.eraseFromParent();
8244 return Legalized;
8245}
8246
8248 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8249 const LLT S64 = LLT::scalar(64);
8250 const LLT S32 = LLT::scalar(32);
8251
8252 // FIXME: Only f32 to i64 conversions are supported.
8253 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8254 return UnableToLegalize;
8255
8256 // Expand f32 -> i64 conversion
8257 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8258 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8259
8260 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8261
8262 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8263 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8264
8265 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8266 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8267
8268 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8269 APInt::getSignMask(SrcEltBits));
8270 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8271 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8272 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8273 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8274
8275 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8276 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8277 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8278
8279 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8280 R = MIRBuilder.buildZExt(DstTy, R);
8281
8282 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8283 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8284 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8285 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8286
8287 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8288 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8289
8290 const LLT S1 = LLT::scalar(1);
8291 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8292 S1, Exponent, ExponentLoBit);
8293
8294 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8295
8296 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8297 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8298
8299 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8300
8301 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8302 S1, Exponent, ZeroSrcTy);
8303
8304 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8305 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8306
8307 MI.eraseFromParent();
8308 return Legalized;
8309}
8310
8313 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8314
8315 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8316 unsigned SatWidth = DstTy.getScalarSizeInBits();
8317
8318 // Determine minimum and maximum integer values and their corresponding
8319 // floating-point values.
8320 APInt MinInt, MaxInt;
8321 if (IsSigned) {
8322 MinInt = APInt::getSignedMinValue(SatWidth);
8323 MaxInt = APInt::getSignedMaxValue(SatWidth);
8324 } else {
8325 MinInt = APInt::getMinValue(SatWidth);
8326 MaxInt = APInt::getMaxValue(SatWidth);
8327 }
8328
8329 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8330 APFloat MinFloat(Semantics);
8331 APFloat MaxFloat(Semantics);
8332
8333 APFloat::opStatus MinStatus =
8334 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8335 APFloat::opStatus MaxStatus =
8336 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8337 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8338 !(MaxStatus & APFloat::opStatus::opInexact);
8339
8340 // If the integer bounds are exactly representable as floats, emit a
8341 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8342 // and selects.
8343 if (AreExactFloatBounds) {
8344 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8345 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8346 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8347 SrcTy.changeElementSize(1), Src, MaxC);
8348 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8349 // Clamp by MaxFloat from above. NaN cannot occur.
8350 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8351 auto MinP =
8352 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8354 auto Min =
8355 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8356 // Convert clamped value to integer. In the unsigned case we're done,
8357 // because we mapped NaN to MinFloat, which will cast to zero.
8358 if (!IsSigned) {
8359 MIRBuilder.buildFPTOUI(Dst, Min);
8360 MI.eraseFromParent();
8361 return Legalized;
8362 }
8363
8364 // Otherwise, select 0 if Src is NaN.
8365 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8366 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8367 DstTy.changeElementSize(1), Src, Src);
8368 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8369 FpToInt);
8370 MI.eraseFromParent();
8371 return Legalized;
8372 }
8373
8374 // Result of direct conversion. The assumption here is that the operation is
8375 // non-trapping and it's fine to apply it to an out-of-range value if we
8376 // select it away later.
8377 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8378 : MIRBuilder.buildFPTOUI(DstTy, Src);
8379
8380 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8381 // MinInt if Src is NaN.
8382 auto ULT =
8383 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8384 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8385 auto Max = MIRBuilder.buildSelect(
8386 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8387 // If Src OGT MaxFloat, select MaxInt.
8388 auto OGT =
8389 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8390 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8391
8392 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8393 // is already zero.
8394 if (!IsSigned) {
8395 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8396 Max);
8397 MI.eraseFromParent();
8398 return Legalized;
8399 }
8400
8401 // Otherwise, select 0 if Src is NaN.
8402 auto Min = MIRBuilder.buildSelect(
8403 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8404 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8405 DstTy.changeElementSize(1), Src, Src);
8406 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8407 MI.eraseFromParent();
8408 return Legalized;
8409}
8410
8411// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8414 const LLT S1 = LLT::scalar(1);
8415 const LLT S32 = LLT::scalar(32);
8416
8417 auto [Dst, Src] = MI.getFirst2Regs();
8418 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8419 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8420
8421 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8422 return UnableToLegalize;
8423
8424 if (MI.getFlag(MachineInstr::FmAfn)) {
8425 unsigned Flags = MI.getFlags();
8426 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8427 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8428 MI.eraseFromParent();
8429 return Legalized;
8430 }
8431
8432 const unsigned ExpMask = 0x7ff;
8433 const unsigned ExpBiasf64 = 1023;
8434 const unsigned ExpBiasf16 = 15;
8435
8436 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8437 Register U = Unmerge.getReg(0);
8438 Register UH = Unmerge.getReg(1);
8439
8440 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8441 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8442
8443 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8444 // add the f16 bias (15) to get the biased exponent for the f16 format.
8445 E = MIRBuilder.buildAdd(
8446 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8447
8448 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8449 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8450
8451 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8452 MIRBuilder.buildConstant(S32, 0x1ff));
8453 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8454
8455 auto Zero = MIRBuilder.buildConstant(S32, 0);
8456 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8457 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8458 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8459
8460 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8461 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8462 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8463 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8464
8465 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8466 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8467
8468 // N = M | (E << 12);
8469 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8470 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8471
8472 // B = clamp(1-E, 0, 13);
8473 auto One = MIRBuilder.buildConstant(S32, 1);
8474 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8475 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8476 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8477
8478 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8479 MIRBuilder.buildConstant(S32, 0x1000));
8480
8481 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8482 auto D0 = MIRBuilder.buildShl(S32, D, B);
8483
8484 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8485 D0, SigSetHigh);
8486 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8487 D = MIRBuilder.buildOr(S32, D, D1);
8488
8489 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8490 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8491
8492 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8493 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8494
8495 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8496 MIRBuilder.buildConstant(S32, 3));
8497 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8498
8499 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8500 MIRBuilder.buildConstant(S32, 5));
8501 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8502
8503 V1 = MIRBuilder.buildOr(S32, V0, V1);
8504 V = MIRBuilder.buildAdd(S32, V, V1);
8505
8506 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8507 E, MIRBuilder.buildConstant(S32, 30));
8508 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8509 MIRBuilder.buildConstant(S32, 0x7c00), V);
8510
8511 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8512 E, MIRBuilder.buildConstant(S32, 1039));
8513 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8514
8515 // Extract the sign bit.
8516 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8517 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8518
8519 // Insert the sign bit
8520 V = MIRBuilder.buildOr(S32, Sign, V);
8521
8522 MIRBuilder.buildTrunc(Dst, V);
8523 MI.eraseFromParent();
8524 return Legalized;
8525}
8526
8529 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8530 const LLT S64 = LLT::scalar(64);
8531 const LLT S16 = LLT::scalar(16);
8532
8533 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8535
8536 return UnableToLegalize;
8537}
8538
8540 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8541 LLT Ty = MRI.getType(Dst);
8542
8543 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8544 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8545 MI.eraseFromParent();
8546 return Legalized;
8547}
8548
8550 switch (Opc) {
8551 case TargetOpcode::G_SMIN:
8552 return CmpInst::ICMP_SLT;
8553 case TargetOpcode::G_SMAX:
8554 return CmpInst::ICMP_SGT;
8555 case TargetOpcode::G_UMIN:
8556 return CmpInst::ICMP_ULT;
8557 case TargetOpcode::G_UMAX:
8558 return CmpInst::ICMP_UGT;
8559 default:
8560 llvm_unreachable("not in integer min/max");
8561 }
8562}
8563
8565 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8566
8567 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8568 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8569
8570 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8571 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8572
8573 MI.eraseFromParent();
8574 return Legalized;
8575}
8576
8579 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8580
8581 Register Dst = Cmp->getReg(0);
8582 LLT DstTy = MRI.getType(Dst);
8583 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8584 LLT CmpTy = DstTy.changeElementSize(1);
8585
8586 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8589 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8592
8593 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8594 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8595 Cmp->getRHSReg());
8596 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8597 Cmp->getRHSReg());
8598
8599 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8600 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8601 if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
8603 auto One = MIRBuilder.buildConstant(DstTy, 1);
8604 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8605
8606 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8607 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8608 } else {
8610 std::swap(IsGT, IsLT);
8611 // Extend boolean results to DstTy, which is at least i2, before subtracting
8612 // them.
8613 unsigned BoolExtOp =
8614 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8615 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8616 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8617 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8618 }
8619
8620 MI.eraseFromParent();
8621 return Legalized;
8622}
8623
8626 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8627 const int Src0Size = Src0Ty.getScalarSizeInBits();
8628 const int Src1Size = Src1Ty.getScalarSizeInBits();
8629
8630 auto SignBitMask = MIRBuilder.buildConstant(
8631 Src0Ty, APInt::getSignMask(Src0Size));
8632
8633 auto NotSignBitMask = MIRBuilder.buildConstant(
8634 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8635
8636 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8637 Register And1;
8638 if (Src0Ty == Src1Ty) {
8639 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8640 } else if (Src0Size > Src1Size) {
8641 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8642 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8643 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8644 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8645 } else {
8646 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8647 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8648 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8649 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8650 }
8651
8652 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8653 // constants are a nan and -0.0, but the final result should preserve
8654 // everything.
8655 unsigned Flags = MI.getFlags();
8656
8657 // We masked the sign bit and the not-sign bit, so these are disjoint.
8658 Flags |= MachineInstr::Disjoint;
8659
8660 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8661
8662 MI.eraseFromParent();
8663 return Legalized;
8664}
8665
8668 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8669 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8670 // depend on fminnum/fmaxnum.
8671
8672 unsigned NewOp;
8673 switch (MI.getOpcode()) {
8674 case TargetOpcode::G_FMINNUM:
8675 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8676 break;
8677 case TargetOpcode::G_FMINIMUMNUM:
8678 NewOp = TargetOpcode::G_FMINNUM;
8679 break;
8680 case TargetOpcode::G_FMAXNUM:
8681 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8682 break;
8683 case TargetOpcode::G_FMAXIMUMNUM:
8684 NewOp = TargetOpcode::G_FMAXNUM;
8685 break;
8686 default:
8687 llvm_unreachable("unexpected min/max opcode");
8688 }
8689
8690 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8691 LLT Ty = MRI.getType(Dst);
8692
8693 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8694 // Insert canonicalizes if it's possible we need to quiet to get correct
8695 // sNaN behavior.
8696
8697 // Note this must be done here, and not as an optimization combine in the
8698 // absence of a dedicate quiet-snan instruction as we're using an
8699 // omni-purpose G_FCANONICALIZE.
8700 if (!isKnownNeverSNaN(Src0, MRI))
8701 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8702
8703 if (!isKnownNeverSNaN(Src1, MRI))
8704 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8705 }
8706
8707 // If there are no nans, it's safe to simply replace this with the non-IEEE
8708 // version.
8709 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8710 MI.eraseFromParent();
8711 return Legalized;
8712}
8713
8715 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8716 Register DstReg = MI.getOperand(0).getReg();
8717 LLT Ty = MRI.getType(DstReg);
8718 unsigned Flags = MI.getFlags();
8719
8720 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8721 Flags);
8722 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8723 MI.eraseFromParent();
8724 return Legalized;
8725}
8726
8729 auto [DstReg, X] = MI.getFirst2Regs();
8730 const unsigned Flags = MI.getFlags();
8731 const LLT Ty = MRI.getType(DstReg);
8732 const LLT CondTy = Ty.changeElementSize(1);
8733
8734 // round(x) =>
8735 // t = trunc(x);
8736 // d = fabs(x - t);
8737 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8738 // return t + o;
8739
8740 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8741
8742 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8743 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8744
8745 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8746 auto Cmp =
8747 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8748
8749 // Could emit G_UITOFP instead
8750 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8751 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8752 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8753 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8754
8755 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8756
8757 MI.eraseFromParent();
8758 return Legalized;
8759}
8760
8762 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8763 unsigned Flags = MI.getFlags();
8764 LLT Ty = MRI.getType(DstReg);
8765 const LLT CondTy = Ty.changeElementSize(1);
8766
8767 // result = trunc(src);
8768 // if (src < 0.0 && src != result)
8769 // result += -1.0.
8770
8771 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8772 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8773
8774 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8775 SrcReg, Zero, Flags);
8776 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8777 SrcReg, Trunc, Flags);
8778 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8779 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8780
8781 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8782 MI.eraseFromParent();
8783 return Legalized;
8784}
8785
8788 const unsigned NumOps = MI.getNumOperands();
8789 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8790 unsigned PartSize = Src0Ty.getSizeInBits();
8791
8792 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8793 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8794
8795 for (unsigned I = 2; I != NumOps; ++I) {
8796 const unsigned Offset = (I - 1) * PartSize;
8797
8798 Register SrcReg = MI.getOperand(I).getReg();
8799 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8800
8801 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8802 MRI.createGenericVirtualRegister(WideTy);
8803
8804 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8805 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8806 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8807 ResultReg = NextResult;
8808 }
8809
8810 if (DstTy.isPointer()) {
8811 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8812 DstTy.getAddressSpace())) {
8813 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8814 return UnableToLegalize;
8815 }
8816
8817 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8818 }
8819
8820 MI.eraseFromParent();
8821 return Legalized;
8822}
8823
8826 const unsigned NumDst = MI.getNumOperands() - 1;
8827 Register SrcReg = MI.getOperand(NumDst).getReg();
8828 Register Dst0Reg = MI.getOperand(0).getReg();
8829 LLT DstTy = MRI.getType(Dst0Reg);
8830 if (DstTy.isPointer())
8831 return UnableToLegalize; // TODO
8832
8833 SrcReg = coerceToScalar(SrcReg);
8834 if (!SrcReg)
8835 return UnableToLegalize;
8836
8837 // Expand scalarizing unmerge as bitcast to integer and shift.
8838 LLT IntTy = MRI.getType(SrcReg);
8839
8840 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8841
8842 const unsigned DstSize = DstTy.getSizeInBits();
8843 unsigned Offset = DstSize;
8844 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8845 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8846 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8847 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
8848 }
8849
8850 MI.eraseFromParent();
8851 return Legalized;
8852}
8853
8854/// Lower a vector extract or insert by writing the vector to a stack temporary
8855/// and reloading the element or vector.
8856///
8857/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8858/// =>
8859/// %stack_temp = G_FRAME_INDEX
8860/// G_STORE %vec, %stack_temp
8861/// %idx = clamp(%idx, %vec.getNumElements())
8862/// %element_ptr = G_PTR_ADD %stack_temp, %idx
8863/// %dst = G_LOAD %element_ptr
8866 Register DstReg = MI.getOperand(0).getReg();
8867 Register SrcVec = MI.getOperand(1).getReg();
8868 Register InsertVal;
8869 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8870 InsertVal = MI.getOperand(2).getReg();
8871
8872 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
8873
8874 LLT VecTy = MRI.getType(SrcVec);
8875 LLT EltTy = VecTy.getElementType();
8876 unsigned NumElts = VecTy.getNumElements();
8877
8878 int64_t IdxVal;
8879 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
8881 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
8882
8883 if (InsertVal) {
8884 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
8885 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
8886 } else {
8887 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
8888 }
8889
8890 MI.eraseFromParent();
8891 return Legalized;
8892 }
8893
8894 if (!EltTy.isByteSized()) { // Not implemented.
8895 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8896 return UnableToLegalize;
8897 }
8898
8899 unsigned EltBytes = EltTy.getSizeInBytes();
8900 Align VecAlign = getStackTemporaryAlignment(VecTy);
8901 Align EltAlign;
8902
8903 MachinePointerInfo PtrInfo;
8904 auto StackTemp = createStackTemporary(
8905 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
8906 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
8907
8908 // Get the pointer to the element, and be sure not to hit undefined behavior
8909 // if the index is out of bounds.
8910 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
8911
8912 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
8913 int64_t Offset = IdxVal * EltBytes;
8914 PtrInfo = PtrInfo.getWithOffset(Offset);
8915 EltAlign = commonAlignment(VecAlign, Offset);
8916 } else {
8917 // We lose information with a variable offset.
8918 EltAlign = getStackTemporaryAlignment(EltTy);
8919 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
8920 }
8921
8922 if (InsertVal) {
8923 // Write the inserted element
8924 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
8925
8926 // Reload the whole vector.
8927 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
8928 } else {
8929 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
8930 }
8931
8932 MI.eraseFromParent();
8933 return Legalized;
8934}
8935
8938 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8939 MI.getFirst3RegLLTs();
8940 LLT IdxTy = LLT::scalar(32);
8941
8942 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
8943 Register Undef;
8945 LLT EltTy = DstTy.getScalarType();
8946
8947 for (int Idx : Mask) {
8948 if (Idx < 0) {
8949 if (!Undef.isValid())
8950 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
8951 BuildVec.push_back(Undef);
8952 continue;
8953 }
8954
8955 if (Src0Ty.isScalar()) {
8956 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
8957 } else {
8958 int NumElts = Src0Ty.getNumElements();
8959 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
8960 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
8961 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
8962 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
8963 BuildVec.push_back(Extract.getReg(0));
8964 }
8965 }
8966
8967 if (DstTy.isVector())
8968 MIRBuilder.buildBuildVector(DstReg, BuildVec);
8969 else
8970 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
8971 MI.eraseFromParent();
8972 return Legalized;
8973}
8974
8977 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8978 MI.getFirst4RegLLTs();
8979
8980 if (VecTy.isScalableVector())
8981 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8982
8983 Align VecAlign = getStackTemporaryAlignment(VecTy);
8984 MachinePointerInfo PtrInfo;
8985 Register StackPtr =
8986 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
8987 PtrInfo)
8988 .getReg(0);
8989 MachinePointerInfo ValPtrInfo =
8991
8992 LLT IdxTy = LLT::scalar(32);
8993 LLT ValTy = VecTy.getElementType();
8994 Align ValAlign = getStackTemporaryAlignment(ValTy);
8995
8996 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
8997
8998 bool HasPassthru =
8999 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9000
9001 if (HasPassthru)
9002 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9003
9004 Register LastWriteVal;
9005 std::optional<APInt> PassthruSplatVal =
9006 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9007
9008 if (PassthruSplatVal.has_value()) {
9009 LastWriteVal =
9010 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9011 } else if (HasPassthru) {
9012 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9013 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9014 {LLT::scalar(32)}, {Popcount});
9015
9016 Register LastElmtPtr =
9017 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9018 LastWriteVal =
9019 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9020 .getReg(0);
9021 }
9022
9023 unsigned NumElmts = VecTy.getNumElements();
9024 for (unsigned I = 0; I < NumElmts; ++I) {
9025 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9026 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9027 Register ElmtPtr =
9028 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9029 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9030
9031 LLT MaskITy = MaskTy.getElementType();
9032 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9033 if (MaskITy.getSizeInBits() > 1)
9034 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9035
9036 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9037 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9038
9039 if (HasPassthru && I == NumElmts - 1) {
9040 auto EndOfVector =
9041 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9042 auto AllLanesSelected = MIRBuilder.buildICmp(
9043 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9044 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9045 {OutPos, EndOfVector});
9046 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9047
9048 LastWriteVal =
9049 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9050 .getReg(0);
9051 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9052 }
9053 }
9054
9055 // TODO: Use StackPtr's FrameIndex alignment.
9056 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9057
9058 MI.eraseFromParent();
9059 return Legalized;
9060}
9061
9063 Register AllocSize,
9064 Align Alignment,
9065 LLT PtrTy) {
9066 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9067
9068 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9069 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9070
9071 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9072 // have to generate an extra instruction to negate the alloc and then use
9073 // G_PTR_ADD to add the negative offset.
9074 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9075 if (Alignment > Align(1)) {
9076 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9077 AlignMask.negate();
9078 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9079 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9080 }
9081
9082 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9083}
9084
9087 const auto &MF = *MI.getMF();
9088 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9089 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9090 return UnableToLegalize;
9091
9092 Register Dst = MI.getOperand(0).getReg();
9093 Register AllocSize = MI.getOperand(1).getReg();
9094 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9095
9096 LLT PtrTy = MRI.getType(Dst);
9097 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9098 Register SPTmp =
9099 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9100
9101 MIRBuilder.buildCopy(SPReg, SPTmp);
9102 MIRBuilder.buildCopy(Dst, SPTmp);
9103
9104 MI.eraseFromParent();
9105 return Legalized;
9106}
9107
9110 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9111 if (!StackPtr)
9112 return UnableToLegalize;
9113
9114 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9115 MI.eraseFromParent();
9116 return Legalized;
9117}
9118
9121 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9122 if (!StackPtr)
9123 return UnableToLegalize;
9124
9125 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9126 MI.eraseFromParent();
9127 return Legalized;
9128}
9129
9132 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9133 unsigned Offset = MI.getOperand(2).getImm();
9134
9135 // Extract sub-vector or one element
9136 if (SrcTy.isVector()) {
9137 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9138 unsigned DstSize = DstTy.getSizeInBits();
9139
9140 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9141 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9142 // Unmerge and allow access to each Src element for the artifact combiner.
9143 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9144
9145 // Take element(s) we need to extract and copy it (merge them).
9146 SmallVector<Register, 8> SubVectorElts;
9147 for (unsigned Idx = Offset / SrcEltSize;
9148 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9149 SubVectorElts.push_back(Unmerge.getReg(Idx));
9150 }
9151 if (SubVectorElts.size() == 1)
9152 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9153 else
9154 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9155
9156 MI.eraseFromParent();
9157 return Legalized;
9158 }
9159 }
9160
9161 if (DstTy.isScalar() &&
9162 (SrcTy.isScalar() ||
9163 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9164 LLT SrcIntTy = SrcTy;
9165 if (!SrcTy.isScalar()) {
9166 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9167 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9168 }
9169
9170 if (Offset == 0)
9171 MIRBuilder.buildTrunc(DstReg, SrcReg);
9172 else {
9173 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9174 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9175 MIRBuilder.buildTrunc(DstReg, Shr);
9176 }
9177
9178 MI.eraseFromParent();
9179 return Legalized;
9180 }
9181
9182 return UnableToLegalize;
9183}
9184
9186 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9187 uint64_t Offset = MI.getOperand(3).getImm();
9188
9189 LLT DstTy = MRI.getType(Src);
9190 LLT InsertTy = MRI.getType(InsertSrc);
9191
9192 // Insert sub-vector or one element
9193 if (DstTy.isVector() && !InsertTy.isPointer()) {
9194 LLT EltTy = DstTy.getElementType();
9195 unsigned EltSize = EltTy.getSizeInBits();
9196 unsigned InsertSize = InsertTy.getSizeInBits();
9197
9198 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9199 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9200 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9202 unsigned Idx = 0;
9203 // Elements from Src before insert start Offset
9204 for (; Idx < Offset / EltSize; ++Idx) {
9205 DstElts.push_back(UnmergeSrc.getReg(Idx));
9206 }
9207
9208 // Replace elements in Src with elements from InsertSrc
9209 if (InsertTy.getSizeInBits() > EltSize) {
9210 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9211 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9212 ++Idx, ++i) {
9213 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9214 }
9215 } else {
9216 DstElts.push_back(InsertSrc);
9217 ++Idx;
9218 }
9219
9220 // Remaining elements from Src after insert
9221 for (; Idx < DstTy.getNumElements(); ++Idx) {
9222 DstElts.push_back(UnmergeSrc.getReg(Idx));
9223 }
9224
9225 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9226 MI.eraseFromParent();
9227 return Legalized;
9228 }
9229 }
9230
9231 if (InsertTy.isVector() ||
9232 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9233 return UnableToLegalize;
9234
9235 const DataLayout &DL = MIRBuilder.getDataLayout();
9236 if ((DstTy.isPointer() &&
9237 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9238 (InsertTy.isPointer() &&
9239 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9240 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9241 return UnableToLegalize;
9242 }
9243
9244 LLT IntDstTy = DstTy;
9245
9246 if (!DstTy.isScalar()) {
9247 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9248 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9249 }
9250
9251 if (!InsertTy.isScalar()) {
9252 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9253 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9254 }
9255
9256 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9257 if (Offset != 0) {
9258 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9259 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9260 }
9261
9263 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9264
9265 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9266 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9267 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9268
9269 MIRBuilder.buildCast(Dst, Or);
9270 MI.eraseFromParent();
9271 return Legalized;
9272}
9273
9276 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9277 MI.getFirst4RegLLTs();
9278 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9279
9280 LLT Ty = Dst0Ty;
9281 LLT BoolTy = Dst1Ty;
9282
9283 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9284
9285 if (IsAdd)
9286 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9287 else
9288 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9289
9290 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9291
9292 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9293
9294 // For an addition, the result should be less than one of the operands (LHS)
9295 // if and only if the other operand (RHS) is negative, otherwise there will
9296 // be overflow.
9297 // For a subtraction, the result should be less than one of the operands
9298 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9299 // otherwise there will be overflow.
9300 auto ResultLowerThanLHS =
9301 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9302 auto ConditionRHS = MIRBuilder.buildICmp(
9303 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9304
9305 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9306
9307 MIRBuilder.buildCopy(Dst0, NewDst0);
9308 MI.eraseFromParent();
9309
9310 return Legalized;
9311}
9312
9314 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9315 const LLT Ty = MRI.getType(Res);
9316
9317 // sum = LHS + RHS + zext(CarryIn)
9318 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9319 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9320 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9321 MIRBuilder.buildCopy(Res, Sum);
9322
9323 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9324 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9325 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9326 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9327
9328 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9329 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9330
9331 MI.eraseFromParent();
9332 return Legalized;
9333}
9334
9336 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9337 const LLT Ty = MRI.getType(Res);
9338
9339 // Diff = LHS - (RHS + zext(CarryIn))
9340 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9341 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9342 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9343 MIRBuilder.buildCopy(Res, Diff);
9344
9345 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9346 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9347 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9348 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9349 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9350 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9351
9352 MI.eraseFromParent();
9353 return Legalized;
9354}
9355
9358 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9359 LLT Ty = MRI.getType(Res);
9360 bool IsSigned;
9361 bool IsAdd;
9362 unsigned BaseOp;
9363 switch (MI.getOpcode()) {
9364 default:
9365 llvm_unreachable("unexpected addsat/subsat opcode");
9366 case TargetOpcode::G_UADDSAT:
9367 IsSigned = false;
9368 IsAdd = true;
9369 BaseOp = TargetOpcode::G_ADD;
9370 break;
9371 case TargetOpcode::G_SADDSAT:
9372 IsSigned = true;
9373 IsAdd = true;
9374 BaseOp = TargetOpcode::G_ADD;
9375 break;
9376 case TargetOpcode::G_USUBSAT:
9377 IsSigned = false;
9378 IsAdd = false;
9379 BaseOp = TargetOpcode::G_SUB;
9380 break;
9381 case TargetOpcode::G_SSUBSAT:
9382 IsSigned = true;
9383 IsAdd = false;
9384 BaseOp = TargetOpcode::G_SUB;
9385 break;
9386 }
9387
9388 if (IsSigned) {
9389 // sadd.sat(a, b) ->
9390 // hi = 0x7fffffff - smax(a, 0)
9391 // lo = 0x80000000 - smin(a, 0)
9392 // a + smin(smax(lo, b), hi)
9393 // ssub.sat(a, b) ->
9394 // lo = smax(a, -1) - 0x7fffffff
9395 // hi = smin(a, -1) - 0x80000000
9396 // a - smin(smax(lo, b), hi)
9397 // TODO: AMDGPU can use a "median of 3" instruction here:
9398 // a +/- med3(lo, b, hi)
9399 uint64_t NumBits = Ty.getScalarSizeInBits();
9400 auto MaxVal =
9401 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9402 auto MinVal =
9403 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9405 if (IsAdd) {
9406 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9407 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9408 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9409 } else {
9410 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9411 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9412 MaxVal);
9413 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9414 MinVal);
9415 }
9416 auto RHSClamped =
9417 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9418 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9419 } else {
9420 // uadd.sat(a, b) -> a + umin(~a, b)
9421 // usub.sat(a, b) -> a - umin(a, b)
9422 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9423 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9424 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9425 }
9426
9427 MI.eraseFromParent();
9428 return Legalized;
9429}
9430
9433 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9434 LLT Ty = MRI.getType(Res);
9435 LLT BoolTy = Ty.changeElementSize(1);
9436 bool IsSigned;
9437 bool IsAdd;
9438 unsigned OverflowOp;
9439 switch (MI.getOpcode()) {
9440 default:
9441 llvm_unreachable("unexpected addsat/subsat opcode");
9442 case TargetOpcode::G_UADDSAT:
9443 IsSigned = false;
9444 IsAdd = true;
9445 OverflowOp = TargetOpcode::G_UADDO;
9446 break;
9447 case TargetOpcode::G_SADDSAT:
9448 IsSigned = true;
9449 IsAdd = true;
9450 OverflowOp = TargetOpcode::G_SADDO;
9451 break;
9452 case TargetOpcode::G_USUBSAT:
9453 IsSigned = false;
9454 IsAdd = false;
9455 OverflowOp = TargetOpcode::G_USUBO;
9456 break;
9457 case TargetOpcode::G_SSUBSAT:
9458 IsSigned = true;
9459 IsAdd = false;
9460 OverflowOp = TargetOpcode::G_SSUBO;
9461 break;
9462 }
9463
9464 auto OverflowRes =
9465 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9466 Register Tmp = OverflowRes.getReg(0);
9467 Register Ov = OverflowRes.getReg(1);
9468 MachineInstrBuilder Clamp;
9469 if (IsSigned) {
9470 // sadd.sat(a, b) ->
9471 // {tmp, ov} = saddo(a, b)
9472 // ov ? (tmp >>s 31) + 0x80000000 : r
9473 // ssub.sat(a, b) ->
9474 // {tmp, ov} = ssubo(a, b)
9475 // ov ? (tmp >>s 31) + 0x80000000 : r
9476 uint64_t NumBits = Ty.getScalarSizeInBits();
9477 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9478 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9479 auto MinVal =
9480 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9481 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9482 } else {
9483 // uadd.sat(a, b) ->
9484 // {tmp, ov} = uaddo(a, b)
9485 // ov ? 0xffffffff : tmp
9486 // usub.sat(a, b) ->
9487 // {tmp, ov} = usubo(a, b)
9488 // ov ? 0 : tmp
9489 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9490 }
9491 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9492
9493 MI.eraseFromParent();
9494 return Legalized;
9495}
9496
9499 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9500 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9501 "Expected shlsat opcode!");
9502 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9503 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9504 LLT Ty = MRI.getType(Res);
9505 LLT BoolTy = Ty.changeElementSize(1);
9506
9507 unsigned BW = Ty.getScalarSizeInBits();
9508 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9509 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9510 : MIRBuilder.buildLShr(Ty, Result, RHS);
9511
9512 MachineInstrBuilder SatVal;
9513 if (IsSigned) {
9514 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9515 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9516 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9517 MIRBuilder.buildConstant(Ty, 0));
9518 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9519 } else {
9520 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9521 }
9522 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9523 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9524
9525 MI.eraseFromParent();
9526 return Legalized;
9527}
9528
9530 auto [Dst, Src] = MI.getFirst2Regs();
9531 const LLT Ty = MRI.getType(Src);
9532 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9533 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9534
9535 // Swap most and least significant byte, set remaining bytes in Res to zero.
9536 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9537 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9538 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9539 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9540
9541 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9542 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9543 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9544 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9545 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9546 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9547 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9548 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9549 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9550 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9551 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9552 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9553 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9554 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9555 }
9556 Res.getInstr()->getOperand(0).setReg(Dst);
9557
9558 MI.eraseFromParent();
9559 return Legalized;
9560}
9561
9562//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9564 MachineInstrBuilder Src, const APInt &Mask) {
9565 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9566 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9567 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9568 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9569 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9570 return B.buildOr(Dst, LHS, RHS);
9571}
9572
9575 auto [Dst, Src] = MI.getFirst2Regs();
9576 const LLT SrcTy = MRI.getType(Src);
9577 unsigned Size = SrcTy.getScalarSizeInBits();
9578 unsigned VSize = SrcTy.getSizeInBits();
9579
9580 if (Size >= 8) {
9581 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9582 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9583 {LLT::fixed_vector(VSize / 8, 8),
9584 LLT::fixed_vector(VSize / 8, 8)}}))) {
9585 // If bitreverse is legal for i8 vector of the same size, then cast
9586 // to i8 vector type.
9587 // e.g. v4s32 -> v16s8
9588 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9589 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9590 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9591 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9592 MIRBuilder.buildBitcast(Dst, RBIT);
9593 } else {
9594 MachineInstrBuilder BSWAP =
9595 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9596
9597 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9598 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9599 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9600 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9601 APInt::getSplat(Size, APInt(8, 0xF0)));
9602
9603 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9604 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9605 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9606 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9607 APInt::getSplat(Size, APInt(8, 0xCC)));
9608
9609 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9610 // 6|7
9611 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9612 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9613 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9614 }
9615 } else {
9616 // Expand bitreverse for types smaller than 8 bits.
9618 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9620 if (I < J) {
9621 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9622 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9623 } else {
9624 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9625 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9626 }
9627
9628 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9629 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9630 if (I == 0)
9631 Tmp = Tmp2;
9632 else
9633 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9634 }
9635 MIRBuilder.buildCopy(Dst, Tmp);
9636 }
9637
9638 MI.eraseFromParent();
9639 return Legalized;
9640}
9641
9644 MachineFunction &MF = MIRBuilder.getMF();
9645
9646 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9647 int NameOpIdx = IsRead ? 1 : 0;
9648 int ValRegIndex = IsRead ? 0 : 1;
9649
9650 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9651 const LLT Ty = MRI.getType(ValReg);
9652 const MDString *RegStr = cast<MDString>(
9653 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9654
9655 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9656 if (!PhysReg) {
9657 const Function &Fn = MF.getFunction();
9659 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9660 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9661 Fn, MI.getDebugLoc()));
9662 if (IsRead)
9663 MIRBuilder.buildUndef(ValReg);
9664
9665 MI.eraseFromParent();
9666 return Legalized;
9667 }
9668
9669 if (IsRead)
9670 MIRBuilder.buildCopy(ValReg, PhysReg);
9671 else
9672 MIRBuilder.buildCopy(PhysReg, ValReg);
9673
9674 MI.eraseFromParent();
9675 return Legalized;
9676}
9677
9680 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9681 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9682 Register Result = MI.getOperand(0).getReg();
9683 LLT OrigTy = MRI.getType(Result);
9684 auto SizeInBits = OrigTy.getScalarSizeInBits();
9685 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9686
9687 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9688 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9689 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9690 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9691
9692 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9693 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9694 MIRBuilder.buildTrunc(Result, Shifted);
9695
9696 MI.eraseFromParent();
9697 return Legalized;
9698}
9699
9702 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9703 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9704
9705 if (Mask == fcNone) {
9706 MIRBuilder.buildConstant(DstReg, 0);
9707 MI.eraseFromParent();
9708 return Legalized;
9709 }
9710 if (Mask == fcAllFlags) {
9711 MIRBuilder.buildConstant(DstReg, 1);
9712 MI.eraseFromParent();
9713 return Legalized;
9714 }
9715
9716 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9717 // version
9718
9719 unsigned BitSize = SrcTy.getScalarSizeInBits();
9720 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9721
9722 LLT IntTy = LLT::scalar(BitSize);
9723 if (SrcTy.isVector())
9724 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
9725 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9726
9727 // Various masks.
9728 APInt SignBit = APInt::getSignMask(BitSize);
9729 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9730 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9731 APInt ExpMask = Inf;
9732 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9733 APInt QNaNBitMask =
9734 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9735 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9736
9737 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9738 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9739 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9740 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9741 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9742
9743 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9744 auto Sign =
9745 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9746
9747 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9748 // Clang doesn't support capture of structured bindings:
9749 LLT DstTyCopy = DstTy;
9750 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9751 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9752 };
9753
9754 // Tests that involve more than one class should be processed first.
9755 if ((Mask & fcFinite) == fcFinite) {
9756 // finite(V) ==> abs(V) u< exp_mask
9757 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9758 ExpMaskC));
9759 Mask &= ~fcFinite;
9760 } else if ((Mask & fcFinite) == fcPosFinite) {
9761 // finite(V) && V > 0 ==> V u< exp_mask
9762 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9763 ExpMaskC));
9764 Mask &= ~fcPosFinite;
9765 } else if ((Mask & fcFinite) == fcNegFinite) {
9766 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9767 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9768 ExpMaskC);
9769 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9770 appendToRes(And);
9771 Mask &= ~fcNegFinite;
9772 }
9773
9774 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9775 // fcZero | fcSubnormal => test all exponent bits are 0
9776 // TODO: Handle sign bit specific cases
9777 // TODO: Handle inverted case
9778 if (PartialCheck == (fcZero | fcSubnormal)) {
9779 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9780 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9781 ExpBits, ZeroC));
9782 Mask &= ~PartialCheck;
9783 }
9784 }
9785
9786 // Check for individual classes.
9787 if (FPClassTest PartialCheck = Mask & fcZero) {
9788 if (PartialCheck == fcPosZero)
9789 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9790 AsInt, ZeroC));
9791 else if (PartialCheck == fcZero)
9792 appendToRes(
9793 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9794 else // fcNegZero
9795 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9796 AsInt, SignBitC));
9797 }
9798
9799 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9800 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9801 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9802 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9803 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9804 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9805 auto SubnormalRes =
9806 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9807 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9808 if (PartialCheck == fcNegSubnormal)
9809 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9810 appendToRes(SubnormalRes);
9811 }
9812
9813 if (FPClassTest PartialCheck = Mask & fcInf) {
9814 if (PartialCheck == fcPosInf)
9815 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9816 AsInt, InfC));
9817 else if (PartialCheck == fcInf)
9818 appendToRes(
9819 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9820 else { // fcNegInf
9821 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9822 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9823 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9824 AsInt, NegInfC));
9825 }
9826 }
9827
9828 if (FPClassTest PartialCheck = Mask & fcNan) {
9829 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
9830 if (PartialCheck == fcNan) {
9831 // isnan(V) ==> abs(V) u> int(inf)
9832 appendToRes(
9833 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
9834 } else if (PartialCheck == fcQNan) {
9835 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9836 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
9837 InfWithQnanBitC));
9838 } else { // fcSNan
9839 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9840 // abs(V) u< (unsigned(Inf) | quiet_bit)
9841 auto IsNan =
9842 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
9843 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
9844 Abs, InfWithQnanBitC);
9845 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
9846 }
9847 }
9848
9849 if (FPClassTest PartialCheck = Mask & fcNormal) {
9850 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9851 // (max_exp-1))
9852 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9853 auto ExpMinusOne = MIRBuilder.buildSub(
9854 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
9855 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9856 auto NormalRes =
9857 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
9858 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
9859 if (PartialCheck == fcNegNormal)
9860 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
9861 else if (PartialCheck == fcPosNormal) {
9862 auto PosSign = MIRBuilder.buildXor(
9863 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
9864 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
9865 }
9866 appendToRes(NormalRes);
9867 }
9868
9869 MIRBuilder.buildCopy(DstReg, Res);
9870 MI.eraseFromParent();
9871 return Legalized;
9872}
9873
9875 // Implement G_SELECT in terms of XOR, AND, OR.
9876 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9877 MI.getFirst4RegLLTs();
9878
9879 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9880 if (IsEltPtr) {
9881 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
9882 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
9883 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
9884 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
9885 DstTy = NewTy;
9886 }
9887
9888 if (MaskTy.isScalar()) {
9889 // Turn the scalar condition into a vector condition mask if needed.
9890
9891 Register MaskElt = MaskReg;
9892
9893 // The condition was potentially zero extended before, but we want a sign
9894 // extended boolean.
9895 if (MaskTy != LLT::scalar(1))
9896 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
9897
9898 // Continue the sign extension (or truncate) to match the data type.
9899 MaskElt =
9900 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
9901
9902 if (DstTy.isVector()) {
9903 // Generate a vector splat idiom.
9904 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
9905 MaskReg = ShufSplat.getReg(0);
9906 } else {
9907 MaskReg = MaskElt;
9908 }
9909 MaskTy = DstTy;
9910 } else if (!DstTy.isVector()) {
9911 // Cannot handle the case that mask is a vector and dst is a scalar.
9912 return UnableToLegalize;
9913 }
9914
9915 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9916 return UnableToLegalize;
9917 }
9918
9919 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
9920 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
9921 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
9922 if (IsEltPtr) {
9923 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
9924 MIRBuilder.buildIntToPtr(DstReg, Or);
9925 } else {
9926 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
9927 }
9928 MI.eraseFromParent();
9929 return Legalized;
9930}
9931
9933 // Split DIVREM into individual instructions.
9934 unsigned Opcode = MI.getOpcode();
9935
9936 MIRBuilder.buildInstr(
9937 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9938 : TargetOpcode::G_UDIV,
9939 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9940 MIRBuilder.buildInstr(
9941 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9942 : TargetOpcode::G_UREM,
9943 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
9944 MI.eraseFromParent();
9945 return Legalized;
9946}
9947
9950 // Expand %res = G_ABS %a into:
9951 // %v1 = G_ASHR %a, scalar_size-1
9952 // %v2 = G_ADD %a, %v1
9953 // %res = G_XOR %v2, %v1
9954 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
9955 Register OpReg = MI.getOperand(1).getReg();
9956 auto ShiftAmt =
9957 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
9958 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
9959 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
9960 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
9961 MI.eraseFromParent();
9962 return Legalized;
9963}
9964
9967 // Expand %res = G_ABS %a into:
9968 // %v1 = G_CONSTANT 0
9969 // %v2 = G_SUB %v1, %a
9970 // %res = G_SMAX %a, %v2
9971 Register SrcReg = MI.getOperand(1).getReg();
9972 LLT Ty = MRI.getType(SrcReg);
9973 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9974 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
9975 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
9976 MI.eraseFromParent();
9977 return Legalized;
9978}
9979
9982 Register SrcReg = MI.getOperand(1).getReg();
9983 Register DestReg = MI.getOperand(0).getReg();
9984 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
9985 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
9986 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
9987 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
9988 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
9989 MI.eraseFromParent();
9990 return Legalized;
9991}
9992
9995 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
9996 MI.getOpcode() == TargetOpcode::G_ABDU) &&
9997 "Expected G_ABDS or G_ABDU instruction");
9998
9999 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10000 LLT Ty = MRI.getType(LHS);
10001
10002 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10003 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10004 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10005 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10006 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10009 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10010 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10011
10012 MI.eraseFromParent();
10013 return Legalized;
10014}
10015
10018 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10019 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10020 "Expected G_ABDS or G_ABDU instruction");
10021
10022 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10023 LLT Ty = MRI.getType(LHS);
10024
10025 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10026 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10027 Register MaxReg, MinReg;
10028 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10029 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10030 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10031 } else {
10032 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10033 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10034 }
10035 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10036
10037 MI.eraseFromParent();
10038 return Legalized;
10039}
10040
10042 Register SrcReg = MI.getOperand(1).getReg();
10043 Register DstReg = MI.getOperand(0).getReg();
10044
10045 LLT Ty = MRI.getType(DstReg);
10046
10047 // Reset sign bit
10048 MIRBuilder.buildAnd(
10049 DstReg, SrcReg,
10050 MIRBuilder.buildConstant(
10051 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10052
10053 MI.eraseFromParent();
10054 return Legalized;
10055}
10056
10059 Register SrcReg = MI.getOperand(1).getReg();
10060 LLT SrcTy = MRI.getType(SrcReg);
10061 LLT DstTy = MRI.getType(SrcReg);
10062
10063 // The source could be a scalar if the IR type was <1 x sN>.
10064 if (SrcTy.isScalar()) {
10065 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10066 return UnableToLegalize; // FIXME: handle extension.
10067 // This can be just a plain copy.
10068 Observer.changingInstr(MI);
10069 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10070 Observer.changedInstr(MI);
10071 return Legalized;
10072 }
10073 return UnableToLegalize;
10074}
10075
10077 MachineFunction &MF = *MI.getMF();
10078 const DataLayout &DL = MIRBuilder.getDataLayout();
10079 LLVMContext &Ctx = MF.getFunction().getContext();
10080 Register ListPtr = MI.getOperand(1).getReg();
10081 LLT PtrTy = MRI.getType(ListPtr);
10082
10083 // LstPtr is a pointer to the head of the list. Get the address
10084 // of the head of the list.
10085 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10086 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10087 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10088 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10089
10090 const Align A(MI.getOperand(2).getImm());
10091 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10092 if (A > TLI.getMinStackArgumentAlignment()) {
10093 Register AlignAmt =
10094 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10095 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10096 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10097 VAList = AndDst.getReg(0);
10098 }
10099
10100 // Increment the pointer, VAList, to the next vaarg
10101 // The list should be bumped by the size of element in the current head of
10102 // list.
10103 Register Dst = MI.getOperand(0).getReg();
10104 LLT LLTTy = MRI.getType(Dst);
10105 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10106 auto IncAmt =
10107 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10108 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10109
10110 // Store the increment VAList to the legalized pointer
10112 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10113 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10114 // Load the actual argument out of the pointer VAList
10115 Align EltAlignment = DL.getABITypeAlign(Ty);
10116 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10117 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10118 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10119
10120 MI.eraseFromParent();
10121 return Legalized;
10122}
10123
10125 // On Darwin, -Os means optimize for size without hurting performance, so
10126 // only really optimize for size when -Oz (MinSize) is used.
10128 return MF.getFunction().hasMinSize();
10129 return MF.getFunction().hasOptSize();
10130}
10131
10132// Returns a list of types to use for memory op lowering in MemOps. A partial
10133// port of findOptimalMemOpLowering in TargetLowering.
10134static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10135 unsigned Limit, const MemOp &Op,
10136 unsigned DstAS, unsigned SrcAS,
10137 const AttributeList &FuncAttributes,
10138 const TargetLowering &TLI) {
10139 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10140 return false;
10141
10142 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10143
10144 if (Ty == LLT()) {
10145 // Use the largest scalar type whose alignment constraints are satisfied.
10146 // We only need to check DstAlign here as SrcAlign is always greater or
10147 // equal to DstAlign (or zero).
10148 Ty = LLT::scalar(64);
10149 if (Op.isFixedDstAlign())
10150 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10151 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10152 Ty = LLT::scalar(Ty.getSizeInBytes());
10153 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10154 // FIXME: check for the largest legal type we can load/store to.
10155 }
10156
10157 unsigned NumMemOps = 0;
10158 uint64_t Size = Op.size();
10159 while (Size) {
10160 unsigned TySize = Ty.getSizeInBytes();
10161 while (TySize > Size) {
10162 // For now, only use non-vector load / store's for the left-over pieces.
10163 LLT NewTy = Ty;
10164 // FIXME: check for mem op safety and legality of the types. Not all of
10165 // SDAGisms map cleanly to GISel concepts.
10166 if (NewTy.isVector())
10167 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10168 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10169 unsigned NewTySize = NewTy.getSizeInBytes();
10170 assert(NewTySize > 0 && "Could not find appropriate type");
10171
10172 // If the new LLT cannot cover all of the remaining bits, then consider
10173 // issuing a (or a pair of) unaligned and overlapping load / store.
10174 unsigned Fast;
10175 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10176 MVT VT = getMVTForLLT(Ty);
10177 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10179 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10181 Fast)
10182 TySize = Size;
10183 else {
10184 Ty = NewTy;
10185 TySize = NewTySize;
10186 }
10187 }
10188
10189 if (++NumMemOps > Limit)
10190 return false;
10191
10192 MemOps.push_back(Ty);
10193 Size -= TySize;
10194 }
10195
10196 return true;
10197}
10198
10199// Get a vectorized representation of the memset value operand, GISel edition.
10201 MachineRegisterInfo &MRI = *MIB.getMRI();
10202 unsigned NumBits = Ty.getScalarSizeInBits();
10203 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10204 if (!Ty.isVector() && ValVRegAndVal) {
10205 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10206 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10207 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10208 }
10209
10210 // Extend the byte value to the larger type, and then multiply by a magic
10211 // value 0x010101... in order to replicate it across every byte.
10212 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10213 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10214 return MIB.buildConstant(Ty, 0).getReg(0);
10215 }
10216
10217 LLT ExtType = Ty.getScalarType();
10218 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10219 if (NumBits > 8) {
10220 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10221 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10222 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10223 }
10224
10225 // For vector types create a G_BUILD_VECTOR.
10226 if (Ty.isVector())
10227 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10228
10229 return Val;
10230}
10231
10233LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10234 uint64_t KnownLen, Align Alignment,
10235 bool IsVolatile) {
10236 auto &MF = *MI.getParent()->getParent();
10237 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10238 auto &DL = MF.getDataLayout();
10239 LLVMContext &C = MF.getFunction().getContext();
10240
10241 assert(KnownLen != 0 && "Have a zero length memset length!");
10242
10243 bool DstAlignCanChange = false;
10244 MachineFrameInfo &MFI = MF.getFrameInfo();
10245 bool OptSize = shouldLowerMemFuncForSize(MF);
10246
10247 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10248 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10249 DstAlignCanChange = true;
10250
10251 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10252 std::vector<LLT> MemOps;
10253
10254 const auto &DstMMO = **MI.memoperands_begin();
10255 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10256
10257 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10258 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10259
10260 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10261 MemOp::Set(KnownLen, DstAlignCanChange,
10262 Alignment,
10263 /*IsZeroMemset=*/IsZeroVal,
10264 /*IsVolatile=*/IsVolatile),
10265 DstPtrInfo.getAddrSpace(), ~0u,
10266 MF.getFunction().getAttributes(), TLI))
10267 return UnableToLegalize;
10268
10269 if (DstAlignCanChange) {
10270 // Get an estimate of the type from the LLT.
10271 Type *IRTy = getTypeForLLT(MemOps[0], C);
10272 Align NewAlign = DL.getABITypeAlign(IRTy);
10273 if (NewAlign > Alignment) {
10274 Alignment = NewAlign;
10275 unsigned FI = FIDef->getOperand(1).getIndex();
10276 // Give the stack frame object a larger alignment if needed.
10277 if (MFI.getObjectAlign(FI) < Alignment)
10278 MFI.setObjectAlignment(FI, Alignment);
10279 }
10280 }
10281
10282 MachineIRBuilder MIB(MI);
10283 // Find the largest store and generate the bit pattern for it.
10284 LLT LargestTy = MemOps[0];
10285 for (unsigned i = 1; i < MemOps.size(); i++)
10286 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10287 LargestTy = MemOps[i];
10288
10289 // The memset stored value is always defined as an s8, so in order to make it
10290 // work with larger store types we need to repeat the bit pattern across the
10291 // wider type.
10292 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10293
10294 if (!MemSetValue)
10295 return UnableToLegalize;
10296
10297 // Generate the stores. For each store type in the list, we generate the
10298 // matching store of that type to the destination address.
10299 LLT PtrTy = MRI.getType(Dst);
10300 unsigned DstOff = 0;
10301 unsigned Size = KnownLen;
10302 for (unsigned I = 0; I < MemOps.size(); I++) {
10303 LLT Ty = MemOps[I];
10304 unsigned TySize = Ty.getSizeInBytes();
10305 if (TySize > Size) {
10306 // Issuing an unaligned load / store pair that overlaps with the previous
10307 // pair. Adjust the offset accordingly.
10308 assert(I == MemOps.size() - 1 && I != 0);
10309 DstOff -= TySize - Size;
10310 }
10311
10312 // If this store is smaller than the largest store see whether we can get
10313 // the smaller value for free with a truncate.
10314 Register Value = MemSetValue;
10315 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10316 MVT VT = getMVTForLLT(Ty);
10317 MVT LargestVT = getMVTForLLT(LargestTy);
10318 if (!LargestTy.isVector() && !Ty.isVector() &&
10319 TLI.isTruncateFree(LargestVT, VT))
10320 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10321 else
10322 Value = getMemsetValue(Val, Ty, MIB);
10323 if (!Value)
10324 return UnableToLegalize;
10325 }
10326
10327 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10328
10329 Register Ptr = Dst;
10330 if (DstOff != 0) {
10331 auto Offset =
10332 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10333 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10334 }
10335
10336 MIB.buildStore(Value, Ptr, *StoreMMO);
10337 DstOff += Ty.getSizeInBytes();
10338 Size -= TySize;
10339 }
10340
10341 MI.eraseFromParent();
10342 return Legalized;
10343}
10344
10346LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10347 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10348
10349 auto [Dst, Src, Len] = MI.getFirst3Regs();
10350
10351 const auto *MMOIt = MI.memoperands_begin();
10352 const MachineMemOperand *MemOp = *MMOIt;
10353 bool IsVolatile = MemOp->isVolatile();
10354
10355 // See if this is a constant length copy
10356 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10357 // FIXME: support dynamically sized G_MEMCPY_INLINE
10358 assert(LenVRegAndVal &&
10359 "inline memcpy with dynamic size is not yet supported");
10360 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10361 if (KnownLen == 0) {
10362 MI.eraseFromParent();
10363 return Legalized;
10364 }
10365
10366 const auto &DstMMO = **MI.memoperands_begin();
10367 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10368 Align DstAlign = DstMMO.getBaseAlign();
10369 Align SrcAlign = SrcMMO.getBaseAlign();
10370
10371 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10372 IsVolatile);
10373}
10374
10376LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10377 uint64_t KnownLen, Align DstAlign,
10378 Align SrcAlign, bool IsVolatile) {
10379 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10380 return lowerMemcpy(MI, Dst, Src, KnownLen,
10381 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10382 IsVolatile);
10383}
10384
10386LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10387 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10388 Align SrcAlign, bool IsVolatile) {
10389 auto &MF = *MI.getParent()->getParent();
10390 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10391 auto &DL = MF.getDataLayout();
10393
10394 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10395
10396 bool DstAlignCanChange = false;
10397 MachineFrameInfo &MFI = MF.getFrameInfo();
10398 Align Alignment = std::min(DstAlign, SrcAlign);
10399
10400 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10401 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10402 DstAlignCanChange = true;
10403
10404 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10405 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10406 // if the memcpy is in a tail call position.
10407
10408 std::vector<LLT> MemOps;
10409
10410 const auto &DstMMO = **MI.memoperands_begin();
10411 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10412 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10413 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10414
10416 MemOps, Limit,
10417 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10418 IsVolatile),
10419 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10420 MF.getFunction().getAttributes(), TLI))
10421 return UnableToLegalize;
10422
10423 if (DstAlignCanChange) {
10424 // Get an estimate of the type from the LLT.
10425 Type *IRTy = getTypeForLLT(MemOps[0], C);
10426 Align NewAlign = DL.getABITypeAlign(IRTy);
10427
10428 // Don't promote to an alignment that would require dynamic stack
10429 // realignment.
10431 if (!TRI->hasStackRealignment(MF))
10432 if (MaybeAlign StackAlign = DL.getStackAlignment())
10433 NewAlign = std::min(NewAlign, *StackAlign);
10434
10435 if (NewAlign > Alignment) {
10436 Alignment = NewAlign;
10437 unsigned FI = FIDef->getOperand(1).getIndex();
10438 // Give the stack frame object a larger alignment if needed.
10439 if (MFI.getObjectAlign(FI) < Alignment)
10440 MFI.setObjectAlignment(FI, Alignment);
10441 }
10442 }
10443
10444 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10445
10446 MachineIRBuilder MIB(MI);
10447 // Now we need to emit a pair of load and stores for each of the types we've
10448 // collected. I.e. for each type, generate a load from the source pointer of
10449 // that type width, and then generate a corresponding store to the dest buffer
10450 // of that value loaded. This can result in a sequence of loads and stores
10451 // mixed types, depending on what the target specifies as good types to use.
10452 unsigned CurrOffset = 0;
10453 unsigned Size = KnownLen;
10454 for (auto CopyTy : MemOps) {
10455 // Issuing an unaligned load / store pair that overlaps with the previous
10456 // pair. Adjust the offset accordingly.
10457 if (CopyTy.getSizeInBytes() > Size)
10458 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10459
10460 // Construct MMOs for the accesses.
10461 auto *LoadMMO =
10462 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10463 auto *StoreMMO =
10464 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10465
10466 // Create the load.
10467 Register LoadPtr = Src;
10469 if (CurrOffset != 0) {
10470 LLT SrcTy = MRI.getType(Src);
10471 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10472 .getReg(0);
10473 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10474 }
10475 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10476
10477 // Create the store.
10478 Register StorePtr = Dst;
10479 if (CurrOffset != 0) {
10480 LLT DstTy = MRI.getType(Dst);
10481 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10482 }
10483 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10484 CurrOffset += CopyTy.getSizeInBytes();
10485 Size -= CopyTy.getSizeInBytes();
10486 }
10487
10488 MI.eraseFromParent();
10489 return Legalized;
10490}
10491
10493LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10494 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10495 bool IsVolatile) {
10496 auto &MF = *MI.getParent()->getParent();
10497 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10498 auto &DL = MF.getDataLayout();
10499 LLVMContext &C = MF.getFunction().getContext();
10500
10501 assert(KnownLen != 0 && "Have a zero length memmove length!");
10502
10503 bool DstAlignCanChange = false;
10504 MachineFrameInfo &MFI = MF.getFrameInfo();
10505 bool OptSize = shouldLowerMemFuncForSize(MF);
10506 Align Alignment = std::min(DstAlign, SrcAlign);
10507
10508 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10509 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10510 DstAlignCanChange = true;
10511
10512 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10513 std::vector<LLT> MemOps;
10514
10515 const auto &DstMMO = **MI.memoperands_begin();
10516 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10517 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10518 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10519
10520 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10521 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10522 // same thing here.
10524 MemOps, Limit,
10525 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10526 /*IsVolatile*/ true),
10527 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10528 MF.getFunction().getAttributes(), TLI))
10529 return UnableToLegalize;
10530
10531 if (DstAlignCanChange) {
10532 // Get an estimate of the type from the LLT.
10533 Type *IRTy = getTypeForLLT(MemOps[0], C);
10534 Align NewAlign = DL.getABITypeAlign(IRTy);
10535
10536 // Don't promote to an alignment that would require dynamic stack
10537 // realignment.
10538 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10539 if (!TRI->hasStackRealignment(MF))
10540 if (MaybeAlign StackAlign = DL.getStackAlignment())
10541 NewAlign = std::min(NewAlign, *StackAlign);
10542
10543 if (NewAlign > Alignment) {
10544 Alignment = NewAlign;
10545 unsigned FI = FIDef->getOperand(1).getIndex();
10546 // Give the stack frame object a larger alignment if needed.
10547 if (MFI.getObjectAlign(FI) < Alignment)
10548 MFI.setObjectAlignment(FI, Alignment);
10549 }
10550 }
10551
10552 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10553
10554 MachineIRBuilder MIB(MI);
10555 // Memmove requires that we perform the loads first before issuing the stores.
10556 // Apart from that, this loop is pretty much doing the same thing as the
10557 // memcpy codegen function.
10558 unsigned CurrOffset = 0;
10559 SmallVector<Register, 16> LoadVals;
10560 for (auto CopyTy : MemOps) {
10561 // Construct MMO for the load.
10562 auto *LoadMMO =
10563 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10564
10565 // Create the load.
10566 Register LoadPtr = Src;
10567 if (CurrOffset != 0) {
10568 LLT SrcTy = MRI.getType(Src);
10569 auto Offset =
10570 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10571 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10572 }
10573 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10574 CurrOffset += CopyTy.getSizeInBytes();
10575 }
10576
10577 CurrOffset = 0;
10578 for (unsigned I = 0; I < MemOps.size(); ++I) {
10579 LLT CopyTy = MemOps[I];
10580 // Now store the values loaded.
10581 auto *StoreMMO =
10582 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10583
10584 Register StorePtr = Dst;
10585 if (CurrOffset != 0) {
10586 LLT DstTy = MRI.getType(Dst);
10587 auto Offset =
10588 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10589 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10590 }
10591 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10592 CurrOffset += CopyTy.getSizeInBytes();
10593 }
10594 MI.eraseFromParent();
10595 return Legalized;
10596}
10597
10600 const unsigned Opc = MI.getOpcode();
10601 // This combine is fairly complex so it's not written with a separate
10602 // matcher function.
10603 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10604 Opc == TargetOpcode::G_MEMSET) &&
10605 "Expected memcpy like instruction");
10606
10607 auto MMOIt = MI.memoperands_begin();
10608 const MachineMemOperand *MemOp = *MMOIt;
10609
10610 Align DstAlign = MemOp->getBaseAlign();
10611 Align SrcAlign;
10612 auto [Dst, Src, Len] = MI.getFirst3Regs();
10613
10614 if (Opc != TargetOpcode::G_MEMSET) {
10615 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10616 MemOp = *(++MMOIt);
10617 SrcAlign = MemOp->getBaseAlign();
10618 }
10619
10620 // See if this is a constant length copy
10621 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10622 if (!LenVRegAndVal)
10623 return UnableToLegalize;
10624 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10625
10626 if (KnownLen == 0) {
10627 MI.eraseFromParent();
10628 return Legalized;
10629 }
10630
10631 if (MaxLen && KnownLen > MaxLen)
10632 return UnableToLegalize;
10633
10634 bool IsVolatile = MemOp->isVolatile();
10635 if (Opc == TargetOpcode::G_MEMCPY) {
10636 auto &MF = *MI.getParent()->getParent();
10637 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10638 bool OptSize = shouldLowerMemFuncForSize(MF);
10639 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10640 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10641 IsVolatile);
10642 }
10643 if (Opc == TargetOpcode::G_MEMMOVE)
10644 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10645 if (Opc == TargetOpcode::G_MEMSET)
10646 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10647 return UnableToLegalize;
10648}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
bool isSigned() const
Definition InstrTypes.h:932
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:199
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:71
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:611
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:232
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:368
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1815
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:241
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:299
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:609
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)