LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
644 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
645}
646
647// Useful for libcalls where all operands have the same type.
650 unsigned Size, Type *OpType,
651 LostDebugLocObserver &LocObserver) const {
652 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
653
654 // FIXME: What does the original arg index mean here?
656 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
657 Args.push_back({MO.getReg(), OpType, 0});
658 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
659 LocObserver, &MI);
660}
661
662LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
663 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
664 LostDebugLocObserver &LocObserver) {
665 MachineFunction &MF = *MI.getMF();
667
668 Register DstSin = MI.getOperand(0).getReg();
669 Register DstCos = MI.getOperand(1).getReg();
670 Register Src = MI.getOperand(2).getReg();
671 LLT DstTy = MRI.getType(DstSin);
672
673 int MemSize = DstTy.getSizeInBytes();
674 Align Alignment = getStackTemporaryAlignment(DstTy);
676 unsigned AddrSpace = DL.getAllocaAddrSpace();
677 MachinePointerInfo PtrInfo;
678
679 Register StackPtrSin =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682 Register StackPtrCos =
683 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
684 .getReg(0);
685
686 auto &Ctx = MF.getFunction().getContext();
687 auto LibcallResult = createLibcall(
688 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
689 {{Src, OpType, 0},
690 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
691 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
692 LocObserver, &MI);
693
694 if (LibcallResult != LegalizeResult::Legalized)
696
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701
702 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
705
707}
708
710LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
711 unsigned Size, Type *OpType,
712 LostDebugLocObserver &LocObserver) {
713 MachineFunction &MF = MIRBuilder.getMF();
714 MachineRegisterInfo &MRI = MF.getRegInfo();
715
716 Register DstFrac = MI.getOperand(0).getReg();
717 Register DstInt = MI.getOperand(1).getReg();
718 Register Src = MI.getOperand(2).getReg();
719 LLT DstTy = MRI.getType(DstFrac);
720
721 int MemSize = DstTy.getSizeInBytes();
722 Align Alignment = getStackTemporaryAlignment(DstTy);
723 const DataLayout &DL = MIRBuilder.getDataLayout();
724 unsigned AddrSpace = DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
726
727 Register StackPtrInt =
728 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
729 .getReg(0);
730
731 auto &Ctx = MF.getFunction().getContext();
732 auto LibcallResult = createLibcall(
733 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
735 LocObserver, &MI);
736
737 if (LibcallResult != LegalizeResult::Legalized)
739
741 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
742
743 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
745
747}
748
749static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
750 Type *FromType) {
751 auto ToMVT = MVT::getVT(ToType);
752 auto FromMVT = MVT::getVT(FromType);
753
754 switch (Opcode) {
755 case TargetOpcode::G_FPEXT:
756 return RTLIB::getFPEXT(FromMVT, ToMVT);
757 case TargetOpcode::G_FPTRUNC:
758 return RTLIB::getFPROUND(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTOSI:
760 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOUI:
762 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
763 case TargetOpcode::G_SITOFP:
764 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
765 case TargetOpcode::G_UITOFP:
766 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
767 }
768 llvm_unreachable("Unsupported libcall function");
769}
770
772 MachineInstr &MI, Type *ToType, Type *FromType,
773 LostDebugLocObserver &LocObserver, bool IsSigned) const {
774 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.Flags[0].setSExt();
778 else
779 Arg.Flags[0].setZExt();
780 }
781
782 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
783 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
784 LocObserver, &MI);
785}
786
789 LostDebugLocObserver &LocObserver) const {
790 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791
793 // Add all the args, except for the last which is an imm denoting 'tail'.
794 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
795 Register Reg = MI.getOperand(i).getReg();
796
797 // Need derive an IR type for call lowering.
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy = nullptr;
800 if (OpLLT.isPointer())
801 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
802 else
803 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
804 Args.push_back({Reg, OpTy, 0});
805 }
806
807 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc = MI.getOpcode();
810 switch (Opc) {
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
813 break;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
817 break;
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
821 break;
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
825 break;
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 if (!Libcalls) // FIXME: Should be mandatory
832
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (RTLibcallImpl == RTLIB::Unsupported) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
844
845 StringRef LibcallName =
847 Info.Callee = MachineOperand::CreateES(LibcallName.data());
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 if (!Libcalls) // FIXME: Should be mandatory
985
986 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
987 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
989
990 // Unsupported libcall on the target.
991 if (RTLibcallImpl == RTLIB::Unsupported) {
992 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
993 << MIRBuilder.getTII().getName(Opc) << "\n");
995 }
996
998 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
999
1000 StringRef LibcallName =
1002 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1003 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1004
1005 llvm::append_range(Info.OrigArgs, Args);
1006 if (!CLI.lowerCall(MIRBuilder, Info))
1008
1010}
1011
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1018 break;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1022 break;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1025 break;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1029 break;
1030 default:
1031 llvm_unreachable("Unexpected opcode");
1032 }
1033 return RTLibcall;
1034}
1035
1036// Some library functions that read FP state (fegetmode, fegetenv) write the
1037// state into a region in memory. IR intrinsics that do the same operations
1038// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1039// intrinsics via the library functions, we need to use temporary variable,
1040// for example:
1041//
1042// %0:_(s32) = G_GET_FPMODE
1043//
1044// is transformed to:
1045//
1046// %1:_(p0) = G_FRAME_INDEX %stack.0
1047// BL &fegetmode
1048// %0:_(s32) = G_LOAD % 1
1049//
1051LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1052 LostDebugLocObserver &LocObserver) {
1053 const DataLayout &DL = MIRBuilder.getDataLayout();
1054 auto &MF = MIRBuilder.getMF();
1055 auto &MRI = *MIRBuilder.getMRI();
1056 auto &Ctx = MF.getFunction().getContext();
1057
1058 // Create temporary, where library function will put the read state.
1059 Register Dst = MI.getOperand(0).getReg();
1060 LLT StateTy = MRI.getType(Dst);
1061 TypeSize StateSize = StateTy.getSizeInBytes();
1062 Align TempAlign = getStackTemporaryAlignment(StateTy);
1063 MachinePointerInfo TempPtrInfo;
1064 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1065
1066 // Create a call to library function, with the temporary as an argument.
1067 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1068 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1069 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1070 auto Res = createLibcall(
1071 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1073 nullptr);
1074 if (Res != LegalizerHelper::Legalized)
1075 return Res;
1076
1077 // Create a load from the temporary.
1078 MachineMemOperand *MMO = MF.getMachineMemOperand(
1079 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1081
1083}
1084
1085// Similar to `createGetStateLibcall` the function calls a library function
1086// using transient space in stack. In this case the library function reads
1087// content of memory region.
1089LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1090 LostDebugLocObserver &LocObserver) {
1091 const DataLayout &DL = MIRBuilder.getDataLayout();
1092 auto &MF = MIRBuilder.getMF();
1093 auto &MRI = *MIRBuilder.getMRI();
1094 auto &Ctx = MF.getFunction().getContext();
1095
1096 // Create temporary, where library function will get the new state.
1097 Register Src = MI.getOperand(0).getReg();
1098 LLT StateTy = MRI.getType(Src);
1099 TypeSize StateSize = StateTy.getSizeInBytes();
1100 Align TempAlign = getStackTemporaryAlignment(StateTy);
1101 MachinePointerInfo TempPtrInfo;
1102 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1103
1104 // Put the new state into the temporary.
1105 MachineMemOperand *MMO = MF.getMachineMemOperand(
1106 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1107 MIRBuilder.buildStore(Src, Temp, *MMO);
1108
1109 // Create a call to library function, with the temporary as an argument.
1110 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1111 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1112 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1113 return createLibcall(RTLibcall,
1114 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver, nullptr);
1117}
1118
1119/// Returns the corresponding libcall for the given Pred and
1120/// the ICMP predicate that should be generated to compare with #0
1121/// after the libcall.
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1125 do { \
1126 switch (Size) { \
1127 case 32: \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1129 case 64: \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1131 case 128: \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1133 default: \
1134 llvm_unreachable("unexpected size"); \
1135 } \
1136 } while (0)
1137
1138 switch (Pred) {
1139 case CmpInst::FCMP_OEQ:
1141 case CmpInst::FCMP_UNE:
1143 case CmpInst::FCMP_OGE:
1145 case CmpInst::FCMP_OLT:
1147 case CmpInst::FCMP_OLE:
1149 case CmpInst::FCMP_OGT:
1151 case CmpInst::FCMP_UNO:
1153 default:
1154 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1155 }
1156}
1157
1159LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1160 LostDebugLocObserver &LocObserver) {
1161 auto &MF = MIRBuilder.getMF();
1162 auto &Ctx = MF.getFunction().getContext();
1163 const GFCmp *Cmp = cast<GFCmp>(&MI);
1164
1165 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1166 unsigned Size = OpLLT.getSizeInBits();
1167 if ((Size != 32 && Size != 64 && Size != 128) ||
1168 OpLLT != MRI.getType(Cmp->getRHSReg()))
1169 return UnableToLegalize;
1170
1171 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1172
1173 // DstReg type is s32
1174 const Register DstReg = Cmp->getReg(0);
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond = Cmp->getCond();
1177
1178 // Reference:
1179 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1180 // Generates a libcall followed by ICMP.
1181 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1182 const CmpInst::Predicate ICmpPred,
1183 const DstOp &Res) -> Register {
1184 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1185 constexpr LLT TempLLT = LLT::scalar(32);
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1187 // Generate libcall, holding result in Temp
1188 const auto Status = createLibcall(
1189 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1190 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1191 LocObserver, &MI);
1192 if (!Status)
1193 return {};
1194
1195 // Compare temp with #0 to get the final result.
1196 return MIRBuilder
1197 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1198 .getReg(0);
1199 };
1200
1201 // Simple case if we have a direct mapping from predicate to libcall
1202 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1204 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1205 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1206 return Legalized;
1207 }
1208 return UnableToLegalize;
1209 }
1210
1211 // No direct mapping found, should be generated as combination of libcalls.
1212
1213 switch (Cond) {
1214 case CmpInst::FCMP_UEQ: {
1215 // FCMP_UEQ: unordered or equal
1216 // Convert into (FCMP_OEQ || FCMP_UNO).
1217
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1221
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1225 if (Oeq && Uno)
1226 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1227 else
1228 return UnableToLegalize;
1229
1230 break;
1231 }
1232 case CmpInst::FCMP_ONE: {
1233 // FCMP_ONE: ordered and operands are unequal
1234 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1235
1236 // We inverse the predicate instead of generating a NOT
1237 // to save one instruction.
1238 // On AArch64 isel can even select two cmp into a single ccmp.
1239 const auto [OeqLibcall, OeqPred] =
1241 const auto NotOeq =
1242 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1243
1244 const auto [UnoLibcall, UnoPred] =
1246 const auto NotUno =
1247 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1248
1249 if (NotOeq && NotUno)
1250 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1251 else
1252 return UnableToLegalize;
1253
1254 break;
1255 }
1256 case CmpInst::FCMP_ULT:
1257 case CmpInst::FCMP_UGE:
1258 case CmpInst::FCMP_UGT:
1259 case CmpInst::FCMP_ULE:
1260 case CmpInst::FCMP_ORD: {
1261 // Convert into: !(inverse(Pred))
1262 // E.g. FCMP_ULT becomes !FCMP_OGE
1263 // This is equivalent to the following, but saves some instructions.
1264 // MIRBuilder.buildNot(
1265 // PredTy,
1266 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1267 // Op1, Op2));
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1271 CmpInst::getInversePredicate(InversedPred), DstReg))
1272 return UnableToLegalize;
1273 break;
1274 }
1275 default:
1276 return UnableToLegalize;
1277 }
1278
1279 return Legalized;
1280}
1281
1282// The function is used to legalize operations that set default environment
1283// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1284// On most targets supported in glibc FE_DFL_MODE is defined as
1285// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1286// it is not true, the target must provide custom lowering.
1288LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1289 LostDebugLocObserver &LocObserver) {
1290 const DataLayout &DL = MIRBuilder.getDataLayout();
1291 auto &MF = MIRBuilder.getMF();
1292 auto &Ctx = MF.getFunction().getContext();
1293
1294 // Create an argument for the library function.
1295 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1296 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1297 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1298 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1299 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1301 MIRBuilder.buildIntToPtr(Dest, DefValue);
1302
1303 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1304 return createLibcall(
1305 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1307}
1308
1311 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1312
1313 switch (MI.getOpcode()) {
1314 default:
1315 return UnableToLegalize;
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = IntegerType::get(Ctx, Size);
1325 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 break;
1329 }
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1366 unsigned Size = LLTy.getSizeInBits();
1367 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1368 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1369 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1370 return UnableToLegalize;
1371 }
1372 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1373 if (Status != Legalized)
1374 return Status;
1375 break;
1376 }
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1379 unsigned Size = LLTy.getSizeInBits();
1380 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1381 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1382 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1383 return UnableToLegalize;
1384 }
1385 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1386 }
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1389 unsigned Size = LLTy.getSizeInBits();
1390 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1391 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1392 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1393 return UnableToLegalize;
1394 }
1395 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1396 }
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1402 unsigned Size = LLTy.getSizeInBits();
1403 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1404 Type *ITy = IntegerType::get(
1405 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1406 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1407 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1408 return UnableToLegalize;
1409 }
1410 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1412 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1413 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1414 if (Status != Legalized)
1415 return Status;
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1422 unsigned Size = LLTy.getSizeInBits();
1423 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1424 Type *ITy = IntegerType::get(
1425 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1426 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1427 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1428 return UnableToLegalize;
1429 }
1430 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1432 {MI.getOperand(1).getReg(), HLTy, 0},
1433 {MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1447 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 FromTy, LocObserver);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1483 if (Status != Legalized)
1484 return Status;
1485 break;
1486 }
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 if (Status != Legalized)
1497 return Status;
1498 break;
1499 }
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1504 LegalizeResult Result =
1505 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1506 if (Result != Legalized)
1507 return Result;
1508 MI.eraseFromParent();
1509 return Result;
1510 }
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1513 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1514 if (Result != Legalized)
1515 return Result;
1516 break;
1517 }
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1520 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1521 if (Result != Legalized)
1522 return Result;
1523 break;
1524 }
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1527 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1528 if (Result != Legalized)
1529 return Result;
1530 break;
1531 }
1532 }
1533
1534 MI.eraseFromParent();
1535 return Legalized;
1536}
1537
1539 unsigned TypeIdx,
1540 LLT NarrowTy) {
1541 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1542 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 return UnableToLegalize;
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 LLT DstTy = MRI.getType(DstReg);
1550
1551 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1552 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1553 // FIXME: Although this would also be legal for the general case, it causes
1554 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1555 // combines not being hit). This seems to be a problem related to the
1556 // artifact combiner.
1557 if (SizeOp0 % NarrowSize != 0) {
1558 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1559 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1560 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565
1566 int NumParts = SizeOp0 / NarrowSize;
1567
1569 for (int i = 0; i < NumParts; ++i)
1570 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1571
1572 if (DstTy.isVector())
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1574 else
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1577 return Legalized;
1578 }
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1581 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1583 unsigned NarrowSize = NarrowTy.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1585
1586 SmallVector<Register, 4> PartRegs;
1587 for (int I = 0; I != NumParts; ++I) {
1588 unsigned Offset = I * NarrowSize;
1589 auto K = MIRBuilder.buildConstant(NarrowTy,
1590 Val.lshr(Offset).trunc(NarrowSize));
1591 PartRegs.push_back(K.getReg(0));
1592 }
1593
1594 LLT LeftoverTy;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1596 SmallVector<Register, 1> LeftoverRegs;
1597 if (LeftoverBits != 0) {
1598 LeftoverTy = LLT::scalar(LeftoverBits);
1599 auto K = MIRBuilder.buildConstant(
1600 LeftoverTy,
1601 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1602 LeftoverRegs.push_back(K.getReg(0));
1603 }
1604
1605 insertParts(MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1607
1608 MI.eraseFromParent();
1609 return Legalized;
1610 }
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1614 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1615 case TargetOpcode::G_TRUNC: {
1616 if (TypeIdx != 1)
1617 return UnableToLegalize;
1618
1619 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1620 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1621 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1622 return UnableToLegalize;
1623 }
1624
1625 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1626 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1628 return Legalized;
1629 }
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1632 if (TypeIdx != 0)
1633 return UnableToLegalize;
1634
1635 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1636 // Should widen scalar first
1637 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1638 return UnableToLegalize;
1639
1640 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1642 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1643 Parts.push_back(
1644 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1645 .getReg(0));
1646 }
1647
1648 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1650 return Legalized;
1651 }
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1662 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1665 return narrowScalarMul(MI, NarrowTy);
1666 case TargetOpcode::G_EXTRACT:
1667 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1668 case TargetOpcode::G_INSERT:
1669 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_LOAD: {
1671 auto &LoadMI = cast<GLoad>(MI);
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1674 if (DstTy.isVector())
1675 return UnableToLegalize;
1676
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1680 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1681 LoadMI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1686 }
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1689 auto &LoadMI = cast<GExtLoad>(MI);
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1692
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1695 unsigned MemSize = MMO.getSizeInBits().getValue();
1696
1697 if (MemSize == NarrowSize) {
1698 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1699 } else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 } else if (MemSize > NarrowSize) {
1702 // FIXME: Need to split the load.
1703 return UnableToLegalize;
1704 }
1705
1706 if (isa<GZExtLoad>(LoadMI))
1707 MIRBuilder.buildZExt(DstReg, TmpReg);
1708 else
1709 MIRBuilder.buildSExt(DstReg, TmpReg);
1710
1711 LoadMI.eraseFromParent();
1712 return Legalized;
1713 }
1714 case TargetOpcode::G_STORE: {
1715 auto &StoreMI = cast<GStore>(MI);
1716
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1720 return UnableToLegalize;
1721
1722 int NumParts = SizeOp0 / NarrowSize;
1723 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1726 return UnableToLegalize;
1727
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1730 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1733 return Legalized;
1734 }
1735
1736 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1737 }
1738 case TargetOpcode::G_SELECT:
1739 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1743 // Legalize bitwise operation:
1744 // A = BinOp<Ty> B, C
1745 // into:
1746 // B1, ..., BN = G_UNMERGE_VALUES B
1747 // C1, ..., CN = G_UNMERGE_VALUES C
1748 // A1 = BinOp<Ty/N> B1, C2
1749 // ...
1750 // AN = BinOp<Ty/N> BN, CN
1751 // A = G_MERGE_VALUES A1, ..., AN
1752 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1753 }
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1757 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1764 if (TypeIdx == 1)
1765 switch (MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1768 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1771 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1772 case TargetOpcode::G_CTPOP:
1773 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTLS:
1775 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1776 default:
1777 return UnableToLegalize;
1778 }
1779
1780 Observer.changingInstr(MI);
1781 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1782 Observer.changedInstr(MI);
1783 return Legalized;
1784 case TargetOpcode::G_INTTOPTR:
1785 if (TypeIdx != 1)
1786 return UnableToLegalize;
1787
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, 1);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 case TargetOpcode::G_PTRTOINT:
1793 if (TypeIdx != 0)
1794 return UnableToLegalize;
1795
1796 Observer.changingInstr(MI);
1797 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1798 Observer.changedInstr(MI);
1799 return Legalized;
1800 case TargetOpcode::G_PHI: {
1801 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1802 // NarrowSize.
1803 if (SizeOp0 % NarrowSize != 0)
1804 return UnableToLegalize;
1805
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1807 SmallVector<Register, 2> DstRegs(NumParts);
1808 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1809 Observer.changingInstr(MI);
1810 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1811 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1812 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1813 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1814 SrcRegs[i / 2], MIRBuilder, MRI);
1815 }
1816 MachineBasicBlock &MBB = *MI.getParent();
1817 MIRBuilder.setInsertPt(MBB, MI);
1818 for (unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1823 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1824 }
1825 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1826 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1827 Observer.changedInstr(MI);
1828 MI.eraseFromParent();
1829 return Legalized;
1830 }
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1833 if (TypeIdx != 2)
1834 return UnableToLegalize;
1835
1836 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1837 Observer.changingInstr(MI);
1838 narrowScalarSrc(MI, NarrowTy, OpIdx);
1839 Observer.changedInstr(MI);
1840 return Legalized;
1841 }
1842 case TargetOpcode::G_ICMP: {
1843 Register LHS = MI.getOperand(2).getReg();
1844 LLT SrcTy = MRI.getType(LHS);
1845 CmpInst::Predicate Pred =
1846 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1847
1848 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1849 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1850 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1851 LHSLeftoverRegs, MIRBuilder, MRI))
1852 return UnableToLegalize;
1853
1854 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1855 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1856 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1858 return UnableToLegalize;
1859
1860 // We now have the LHS and RHS of the compare split into narrow-type
1861 // registers, plus potentially some leftover type.
1862 Register Dst = MI.getOperand(0).getReg();
1863 LLT ResTy = MRI.getType(Dst);
1864 if (ICmpInst::isEquality(Pred)) {
1865 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1866 // them together. For each equal part, the result should be all 0s. For
1867 // each non-equal part, we'll get at least one 1.
1868 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1874 Xors.push_back(Xor);
1875 }
1876
1877 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1878 // to the desired narrow type so that we can OR them together later.
1879 SmallVector<Register, 4> WidenedXors;
1880 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1887 llvm::append_range(Xors, WidenedXors);
1888 }
1889
1890 // Now, for each part we broke up, we know if they are equal/not equal
1891 // based off the G_XOR. We can OR these all together and compare against
1892 // 0 to get the result.
1893 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1894 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1896 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1897 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1898 } else {
1899 Register CmpIn;
1900 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1901 Register CmpOut;
1902 CmpInst::Predicate PartPred;
1903
1904 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1905 PartPred = Pred;
1906 CmpOut = Dst;
1907 } else {
1908 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1910 }
1911
1912 if (!CmpIn) {
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1914 RHSPartRegs[I]);
1915 } else {
1916 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1919 LHSPartRegs[I], RHSPartRegs[I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1921 }
1922
1923 CmpIn = CmpOut;
1924 }
1925
1926 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1940 RHSLeftoverRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 auto CmpEq =
1945 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1946 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1948 }
1949
1950 CmpIn = CmpOut;
1951 }
1952 }
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
1956 case TargetOpcode::G_FCMP:
1957 if (TypeIdx != 0)
1958 return UnableToLegalize;
1959
1960 Observer.changingInstr(MI);
1961 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1962 Observer.changedInstr(MI);
1963 return Legalized;
1964
1965 case TargetOpcode::G_SEXT_INREG: {
1966 if (TypeIdx != 0)
1967 return UnableToLegalize;
1968
1969 int64_t SizeInBits = MI.getOperand(2).getImm();
1970
1971 // So long as the new type has more bits than the bits we're extending we
1972 // don't need to break it apart.
1973 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1974 Observer.changingInstr(MI);
1975 // We don't lose any non-extension bits by truncating the src and
1976 // sign-extending the dst.
1977 MachineOperand &MO1 = MI.getOperand(1);
1978 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.setReg(TruncMIB.getReg(0));
1980
1981 MachineOperand &MO2 = MI.getOperand(0);
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1983 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1984 MIRBuilder.buildSExt(MO2, DstExt);
1985 MO2.setReg(DstExt);
1986 Observer.changedInstr(MI);
1987 return Legalized;
1988 }
1989
1990 // Break it apart. Components below the extension point are unmodified. The
1991 // component containing the extension point becomes a narrower SEXT_INREG.
1992 // Components above it are ashr'd from the component containing the
1993 // extension point.
1994 if (SizeOp0 % NarrowSize != 0)
1995 return UnableToLegalize;
1996 int NumParts = SizeOp0 / NarrowSize;
1997
1998 // List the registers where the destination will be scattered.
2000 // List the registers where the source will be split.
2002
2003 // Create all the temporary registers.
2004 for (int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2006
2007 SrcRegs.push_back(SrcReg);
2008 }
2009
2010 // Explode the big arguments into smaller chunks.
2011 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2012
2013 Register AshrCstReg =
2014 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2015 .getReg(0);
2016 Register FullExtensionReg;
2017 Register PartialExtensionReg;
2018
2019 // Do the operation on each small part.
2020 for (int i = 0; i < NumParts; ++i) {
2021 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2022 DstRegs.push_back(SrcRegs[i]);
2023 PartialExtensionReg = DstRegs.back();
2024 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2028 DstRegs.push_back(FullExtensionReg);
2029 continue;
2030 }
2031 DstRegs.push_back(
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2033 .getReg(0));
2034 FullExtensionReg = DstRegs.back();
2035 } else {
2036 DstRegs.push_back(
2038 .buildInstr(
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2040 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2041 .getReg(0));
2042 PartialExtensionReg = DstRegs.back();
2043 }
2044 }
2045
2046 // Gather the destination registers into the final destination.
2047 Register DstReg = MI.getOperand(0).getReg();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2050 return Legalized;
2051 }
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2055 return UnableToLegalize;
2056
2057 Observer.changingInstr(MI);
2058 SmallVector<Register, 2> SrcRegs, DstRegs;
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2061 MIRBuilder, MRI);
2062
2063 for (unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2066 DstRegs.push_back(DstPart.getReg(0));
2067 }
2068
2069 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2070
2071 Observer.changedInstr(MI);
2072 MI.eraseFromParent();
2073 return Legalized;
2074 }
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079 Observer.changingInstr(MI);
2080 narrowScalarSrc(MI, NarrowTy, 2);
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 }
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2088 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2089 case TargetOpcode::G_FPEXT:
2090 if (TypeIdx != 0)
2091 return UnableToLegalize;
2092 Observer.changingInstr(MI);
2093 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2094 Observer.changedInstr(MI);
2095 return Legalized;
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2098 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2099 case TargetOpcode::G_VSCALE: {
2100 Register Dst = MI.getOperand(0).getReg();
2101 LLT Ty = MRI.getType(Dst);
2102
2103 // Assume VSCALE(1) fits into a legal integer
2104 const APInt One(NarrowTy.getSizeInBits(), 1);
2105 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2108 MIRBuilder.buildMul(Dst, ZExt, C);
2109
2110 MI.eraseFromParent();
2111 return Legalized;
2112 }
2113 }
2114}
2115
2117 LLT Ty = MRI.getType(Val);
2118 if (Ty.isScalar())
2119 return Val;
2120
2121 const DataLayout &DL = MIRBuilder.getDataLayout();
2122 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2123 if (Ty.isPointer()) {
2124 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2125 return Register();
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2127 }
2128
2129 Register NewVal = Val;
2130
2131 assert(Ty.isVector());
2132 if (Ty.isPointerVector())
2133 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2135}
2136
2138 unsigned OpIdx, unsigned ExtOpcode) {
2139 MachineOperand &MO = MI.getOperand(OpIdx);
2140 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.setReg(ExtB.getReg(0));
2142}
2143
2145 unsigned OpIdx) {
2146 MachineOperand &MO = MI.getOperand(OpIdx);
2147 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.setReg(ExtB.getReg(0));
2149}
2150
2152 unsigned OpIdx, unsigned TruncOpcode) {
2153 MachineOperand &MO = MI.getOperand(OpIdx);
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2155 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2157 MO.setReg(DstExt);
2158}
2159
2161 unsigned OpIdx, unsigned ExtOpcode) {
2162 MachineOperand &MO = MI.getOperand(OpIdx);
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2164 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2166 MO.setReg(DstTrunc);
2167}
2168
2170 unsigned OpIdx) {
2171 MachineOperand &MO = MI.getOperand(OpIdx);
2172 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2173 Register Dst = MO.getReg();
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2175 MO.setReg(DstExt);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2177}
2178
2180 unsigned OpIdx) {
2181 MachineOperand &MO = MI.getOperand(OpIdx);
2182 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2183}
2184
2186 MachineOperand &Op = MI.getOperand(OpIdx);
2187 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2188}
2189
2191 MachineOperand &MO = MI.getOperand(OpIdx);
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2193 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2194 MIRBuilder.buildBitcast(MO, CastDst);
2195 MO.setReg(CastDst);
2196}
2197
2199LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2200 LLT WideTy) {
2201 if (TypeIdx != 1)
2202 return UnableToLegalize;
2203
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2206 return UnableToLegalize;
2207
2208 LLT SrcTy = MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2211 const int WideSize = WideTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2213
2214 unsigned NumOps = MI.getNumOperands();
2215 unsigned NumSrc = MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2217
2218 if (WideSize >= DstSize) {
2219 // Directly pack the bits in the target type.
2220 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2221
2222 for (unsigned I = 2; I != NumOps; ++I) {
2223 const unsigned Offset = (I - 1) * PartSize;
2224
2225 Register SrcReg = MI.getOperand(I).getReg();
2226 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2227
2228 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2229
2230 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2231 MRI.createGenericVirtualRegister(WideTy);
2232
2233 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2234 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2235 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2236 ResultReg = NextResult;
2237 }
2238
2239 if (WideSize > DstSize)
2240 MIRBuilder.buildTrunc(DstReg, ResultReg);
2241 else if (DstTy.isPointer())
2242 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2243
2244 MI.eraseFromParent();
2245 return Legalized;
2246 }
2247
2248 // Unmerge the original values to the GCD type, and recombine to the next
2249 // multiple greater than the original type.
2250 //
2251 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2252 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2253 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2254 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2255 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2256 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2257 // %12:_(s12) = G_MERGE_VALUES %10, %11
2258 //
2259 // Padding with undef if necessary:
2260 //
2261 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2262 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2263 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2264 // %7:_(s2) = G_IMPLICIT_DEF
2265 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2266 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2267 // %10:_(s12) = G_MERGE_VALUES %8, %9
2268
2269 const int GCD = std::gcd(SrcSize, WideSize);
2270 LLT GCDTy = LLT::scalar(GCD);
2271
2272 SmallVector<Register, 8> NewMergeRegs;
2273 SmallVector<Register, 8> Unmerges;
2274 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2275
2276 // Decompose the original operands if they don't evenly divide.
2277 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2278 Register SrcReg = MO.getReg();
2279 if (GCD == SrcSize) {
2280 Unmerges.push_back(SrcReg);
2281 } else {
2282 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2284 Unmerges.push_back(Unmerge.getReg(J));
2285 }
2286 }
2287
2288 // Pad with undef to the next size that is a multiple of the requested size.
2289 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2290 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2291 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2292 Unmerges.push_back(UndefReg);
2293 }
2294
2295 const int PartsPerGCD = WideSize / GCD;
2296
2297 // Build merges of each piece.
2298 ArrayRef<Register> Slicer(Unmerges);
2299 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2300 auto Merge =
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2302 NewMergeRegs.push_back(Merge.getReg(0));
2303 }
2304
2305 // A truncate may be necessary if the requested type doesn't evenly divide the
2306 // original result type.
2307 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2309 } else {
2310 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2312 }
2313
2314 MI.eraseFromParent();
2315 return Legalized;
2316}
2317
2319LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2320 LLT WideTy) {
2321 if (TypeIdx != 0)
2322 return UnableToLegalize;
2323
2324 int NumDst = MI.getNumOperands() - 1;
2325 Register SrcReg = MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2327 if (SrcTy.isVector())
2328 return UnableToLegalize;
2329
2330 Register Dst0Reg = MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2332 if (!DstTy.isScalar())
2333 return UnableToLegalize;
2334
2335 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2336 if (SrcTy.isPointer()) {
2337 const DataLayout &DL = MIRBuilder.getDataLayout();
2338 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2339 LLVM_DEBUG(
2340 dbgs() << "Not casting non-integral address space integer\n");
2341 return UnableToLegalize;
2342 }
2343
2344 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2345 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2346 }
2347
2348 // Widen SrcTy to WideTy. This does not affect the result, but since the
2349 // user requested this size, it is probably better handled than SrcTy and
2350 // should reduce the total number of legalization artifacts.
2351 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2352 SrcTy = WideTy;
2353 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2354 }
2355
2356 // Theres no unmerge type to target. Directly extract the bits from the
2357 // source type
2358 unsigned DstSize = DstTy.getSizeInBits();
2359
2360 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2361 for (int I = 1; I != NumDst; ++I) {
2362 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2363 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2364 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2365 }
2366
2367 MI.eraseFromParent();
2368 return Legalized;
2369 }
2370
2371 // Extend the source to a wider type.
2372 LLT LCMTy = getLCMType(SrcTy, WideTy);
2373
2374 Register WideSrc = SrcReg;
2375 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2376 // TODO: If this is an integral address space, cast to integer and anyext.
2377 if (SrcTy.isPointer()) {
2378 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2379 return UnableToLegalize;
2380 }
2381
2382 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2383 }
2384
2385 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2386
2387 // Create a sequence of unmerges and merges to the original results. Since we
2388 // may have widened the source, we will need to pad the results with dead defs
2389 // to cover the source register.
2390 // e.g. widen s48 to s64:
2391 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2392 //
2393 // =>
2394 // %4:_(s192) = G_ANYEXT %0:_(s96)
2395 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2396 // ; unpack to GCD type, with extra dead defs
2397 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2398 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2399 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2400 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2401 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2402 const LLT GCDTy = getGCDType(WideTy, DstTy);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2404 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2405
2406 // Directly unmerge to the destination without going through a GCD type
2407 // if possible
2408 if (PartsPerRemerge == 1) {
2409 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2410
2411 for (int I = 0; I != NumUnmerge; ++I) {
2412 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2413
2414 for (int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx = I * PartsPerUnmerge + J;
2416 if (Idx < NumDst)
2417 MIB.addDef(MI.getOperand(Idx).getReg());
2418 else {
2419 // Create dead def for excess components.
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2421 }
2422 }
2423
2424 MIB.addUse(Unmerge.getReg(I));
2425 }
2426 } else {
2427 SmallVector<Register, 16> Parts;
2428 for (int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2430
2431 SmallVector<Register, 8> RemergeParts;
2432 for (int I = 0; I != NumDst; ++I) {
2433 for (int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx = I * PartsPerRemerge + J;
2435 RemergeParts.emplace_back(Parts[Idx]);
2436 }
2437
2438 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2439 RemergeParts.clear();
2440 }
2441 }
2442
2443 MI.eraseFromParent();
2444 return Legalized;
2445}
2446
2448LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2449 LLT WideTy) {
2450 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2451 unsigned Offset = MI.getOperand(2).getImm();
2452
2453 if (TypeIdx == 0) {
2454 if (SrcTy.isVector() || DstTy.isVector())
2455 return UnableToLegalize;
2456
2457 SrcOp Src(SrcReg);
2458 if (SrcTy.isPointer()) {
2459 // Extracts from pointers can be handled only if they are really just
2460 // simple integers.
2461 const DataLayout &DL = MIRBuilder.getDataLayout();
2462 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2463 return UnableToLegalize;
2464
2465 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2466 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2467 SrcTy = SrcAsIntTy;
2468 }
2469
2470 if (DstTy.isPointer())
2471 return UnableToLegalize;
2472
2473 if (Offset == 0) {
2474 // Avoid a shift in the degenerate case.
2475 MIRBuilder.buildTrunc(DstReg,
2476 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2477 MI.eraseFromParent();
2478 return Legalized;
2479 }
2480
2481 // Do a shift in the source type.
2482 LLT ShiftTy = SrcTy;
2483 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2484 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2485 ShiftTy = WideTy;
2486 }
2487
2488 auto LShr = MIRBuilder.buildLShr(
2489 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2490 MIRBuilder.buildTrunc(DstReg, LShr);
2491 MI.eraseFromParent();
2492 return Legalized;
2493 }
2494
2495 if (SrcTy.isScalar()) {
2496 Observer.changingInstr(MI);
2497 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2498 Observer.changedInstr(MI);
2499 return Legalized;
2500 }
2501
2502 if (!SrcTy.isVector())
2503 return UnableToLegalize;
2504
2505 if (DstTy != SrcTy.getElementType())
2506 return UnableToLegalize;
2507
2508 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2509 return UnableToLegalize;
2510
2511 Observer.changingInstr(MI);
2512 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2513
2514 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2515 Offset);
2516 widenScalarDst(MI, WideTy.getScalarType(), 0);
2517 Observer.changedInstr(MI);
2518 return Legalized;
2519}
2520
2522LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2523 LLT WideTy) {
2524 if (TypeIdx != 0 || WideTy.isVector())
2525 return UnableToLegalize;
2526 Observer.changingInstr(MI);
2527 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2528 widenScalarDst(MI, WideTy);
2529 Observer.changedInstr(MI);
2530 return Legalized;
2531}
2532
2534LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2535 LLT WideTy) {
2536 unsigned Opcode;
2537 unsigned ExtOpcode;
2538 std::optional<Register> CarryIn;
2539 switch (MI.getOpcode()) {
2540 default:
2541 llvm_unreachable("Unexpected opcode!");
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2545 break;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2549 break;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2553 break;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2557 break;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn = MI.getOperand(4).getReg();
2562 break;
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn = MI.getOperand(4).getReg();
2567 break;
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn = MI.getOperand(4).getReg();
2572 break;
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn = MI.getOperand(4).getReg();
2577 break;
2578 }
2579
2580 if (TypeIdx == 1) {
2581 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2582
2583 Observer.changingInstr(MI);
2584 if (CarryIn)
2585 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2586 widenScalarDst(MI, WideTy, 1);
2587
2588 Observer.changedInstr(MI);
2589 return Legalized;
2590 }
2591
2592 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2593 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2594 // Do the arithmetic in the larger type.
2595 Register NewOp;
2596 if (CarryIn) {
2597 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2598 NewOp = MIRBuilder
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2601 .getReg(0);
2602 } else {
2603 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2604 }
2605 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2606 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2608 // There is no overflow if the ExtOp is the same as NewOp.
2609 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2610 // Now trunc the NewOp to the original result.
2611 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2612 MI.eraseFromParent();
2613 return Legalized;
2614}
2615
2617LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2618 LLT WideTy) {
2619 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2624 // We can convert this to:
2625 // 1. Any extend iN to iM
2626 // 2. SHL by M-N
2627 // 3. [US][ADD|SUB|SHL]SAT
2628 // 4. L/ASHR by M-N
2629 //
2630 // It may be more efficient to lower this to a min and a max operation in
2631 // the higher precision arithmetic if the promoted operation isn't legal,
2632 // but this decision is up to the target's lowering request.
2633 Register DstReg = MI.getOperand(0).getReg();
2634
2635 unsigned NewBits = WideTy.getScalarSizeInBits();
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2637
2638 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2639 // must not left shift the RHS to preserve the shift amount.
2640 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2641 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2642 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2643 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2644 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2645 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2646
2647 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR}, MI.getFlags());
2649
2650 // Use a shift that will preserve the number of sign bits when the trunc is
2651 // folded away.
2652 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2653 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2654
2655 MIRBuilder.buildTrunc(DstReg, Result);
2656 MI.eraseFromParent();
2657 return Legalized;
2658}
2659
2661LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2662 LLT WideTy) {
2663 if (TypeIdx == 1) {
2664 Observer.changingInstr(MI);
2665 widenScalarDst(MI, WideTy, 1);
2666 Observer.changedInstr(MI);
2667 return Legalized;
2668 }
2669
2670 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2671 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2672 LLT SrcTy = MRI.getType(LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2674 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2675
2676 // To determine if the result overflowed in the larger type, we extend the
2677 // input to the larger type, do the multiply (checking if it overflows),
2678 // then also check the high bits of the result to see if overflow happened
2679 // there.
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2682 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2683
2684 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2685 // so we don't need to check the overflow result of larger type Mulo.
2686 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2687
2688 unsigned MulOpc =
2689 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2690
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2695 else
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2697
2698 auto Mul = Mulo->getOperand(0);
2699 MIRBuilder.buildTrunc(Result, Mul);
2700
2701 MachineInstrBuilder ExtResult;
2702 // Overflow occurred if it occurred in the larger type, or if the high part
2703 // of the result does not zero/sign-extend the low part. Check this second
2704 // possibility first.
2705 if (IsSigned) {
2706 // For signed, overflow occurred when the high part does not sign-extend
2707 // the low part.
2708 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2709 } else {
2710 // Unsigned overflow occurred when the high part does not zero-extend the
2711 // low part.
2712 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2713 }
2714
2715 if (WideMulCanOverflow) {
2716 auto Overflow =
2717 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2718 // Finally check if the multiplication in the larger type itself overflowed.
2719 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2720 } else {
2721 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2722 }
2723 MI.eraseFromParent();
2724 return Legalized;
2725}
2726
2729 unsigned Opcode = MI.getOpcode();
2730 switch (Opcode) {
2731 default:
2732 return UnableToLegalize;
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2744 Observer.changingInstr(MI);
2745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2746 widenScalarDst(MI, WideTy, 0);
2747 Observer.changedInstr(MI);
2748 return Legalized;
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2751 Observer.changingInstr(MI);
2752 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2753 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2754 widenScalarDst(MI, WideTy, 0);
2755 Observer.changedInstr(MI);
2756 return Legalized;
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2758 if (TypeIdx == 0) {
2759 Observer.changingInstr(MI);
2760 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2761 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2762 widenScalarDst(MI, WideTy, 0);
2763 Observer.changedInstr(MI);
2764 return Legalized;
2765 }
2766 assert(TypeIdx == 1 &&
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2768 Observer.changingInstr(MI);
2769 widenScalarDst(MI, WideTy, 1);
2770 Observer.changedInstr(MI);
2771 return Legalized;
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2805 if (TypeIdx == 0) {
2806 Observer.changingInstr(MI);
2807 widenScalarDst(MI, WideTy, 0);
2808 Observer.changedInstr(MI);
2809 return Legalized;
2810 }
2811
2812 Register SrcReg = MI.getOperand(1).getReg();
2813
2814 // First extend the input.
2815 unsigned ExtOpc;
2816 switch (Opcode) {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 ExtOpc = TargetOpcode::G_ANYEXT;
2820 break;
2821 case TargetOpcode::G_CTLS:
2822 ExtOpc = TargetOpcode::G_SEXT;
2823 break;
2824 default:
2825 ExtOpc = TargetOpcode::G_ZEXT;
2826 }
2827
2828 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2829 LLT CurTy = MRI.getType(SrcReg);
2830 unsigned NewOpc = Opcode;
2831 if (NewOpc == TargetOpcode::G_CTTZ) {
2832 // The count is the same in the larger type except if the original
2833 // value was zero. This can be handled by setting the bit just off
2834 // the top of the original type.
2835 auto TopBit =
2837 MIBSrc = MIRBuilder.buildOr(
2838 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2839 // Now we know the operand is non-zero, use the more relaxed opcode.
2840 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2841 }
2842
2843 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2844
2845 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2846 // An optimization where the result is the CTLZ after the left shift by
2847 // (Difference in widety and current ty), that is,
2848 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2849 // Result = ctlz MIBSrc
2850 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2851 MIRBuilder.buildConstant(WideTy, SizeDiff));
2852 }
2853
2854 // Perform the operation at the larger size.
2855 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2856 // This is already the correct result for CTPOP and CTTZs
2857 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2858 // The correct result is NewOp - (Difference in widety and current ty).
2859 MIBNewOp = MIRBuilder.buildSub(
2860 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2861 }
2862
2863 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2864 MI.eraseFromParent();
2865 return Legalized;
2866 }
2867 case TargetOpcode::G_BSWAP: {
2868 Observer.changingInstr(MI);
2869 Register DstReg = MI.getOperand(0).getReg();
2870
2871 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2872 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2873 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2874 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2875
2876 MI.getOperand(0).setReg(DstExt);
2877
2878 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2879
2880 LLT Ty = MRI.getType(DstReg);
2881 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2882 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2883 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2884
2885 MIRBuilder.buildTrunc(DstReg, ShrReg);
2886 Observer.changedInstr(MI);
2887 return Legalized;
2888 }
2889 case TargetOpcode::G_BITREVERSE: {
2890 Observer.changingInstr(MI);
2891
2892 Register DstReg = MI.getOperand(0).getReg();
2893 LLT Ty = MRI.getType(DstReg);
2894 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2895
2896 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2897 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2898 MI.getOperand(0).setReg(DstExt);
2899 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2900
2901 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2902 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2903 MIRBuilder.buildTrunc(DstReg, Shift);
2904 Observer.changedInstr(MI);
2905 return Legalized;
2906 }
2907 case TargetOpcode::G_FREEZE:
2908 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2909 Observer.changingInstr(MI);
2910 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2911 widenScalarDst(MI, WideTy);
2912 Observer.changedInstr(MI);
2913 return Legalized;
2914
2915 case TargetOpcode::G_ABS:
2916 Observer.changingInstr(MI);
2917 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2918 widenScalarDst(MI, WideTy);
2919 Observer.changedInstr(MI);
2920 return Legalized;
2921
2922 case TargetOpcode::G_ADD:
2923 case TargetOpcode::G_AND:
2924 case TargetOpcode::G_MUL:
2925 case TargetOpcode::G_OR:
2926 case TargetOpcode::G_XOR:
2927 case TargetOpcode::G_SUB:
2928 case TargetOpcode::G_SHUFFLE_VECTOR:
2929 // Perform operation at larger width (any extension is fines here, high bits
2930 // don't affect the result) and then truncate the result back to the
2931 // original type.
2932 Observer.changingInstr(MI);
2933 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2934 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2935 widenScalarDst(MI, WideTy);
2936 Observer.changedInstr(MI);
2937 return Legalized;
2938
2939 case TargetOpcode::G_SBFX:
2940 case TargetOpcode::G_UBFX:
2941 Observer.changingInstr(MI);
2942
2943 if (TypeIdx == 0) {
2944 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2945 widenScalarDst(MI, WideTy);
2946 } else {
2947 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2948 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2949 }
2950
2951 Observer.changedInstr(MI);
2952 return Legalized;
2953
2954 case TargetOpcode::G_SHL:
2955 Observer.changingInstr(MI);
2956
2957 if (TypeIdx == 0) {
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2959 widenScalarDst(MI, WideTy);
2960 } else {
2961 assert(TypeIdx == 1);
2962 // The "number of bits to shift" operand must preserve its value as an
2963 // unsigned integer:
2964 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2965 }
2966
2967 Observer.changedInstr(MI);
2968 return Legalized;
2969
2970 case TargetOpcode::G_ROTR:
2971 case TargetOpcode::G_ROTL:
2972 if (TypeIdx != 1)
2973 return UnableToLegalize;
2974
2975 Observer.changingInstr(MI);
2976 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2977 Observer.changedInstr(MI);
2978 return Legalized;
2979
2980 case TargetOpcode::G_SDIV:
2981 case TargetOpcode::G_SREM:
2982 case TargetOpcode::G_SMIN:
2983 case TargetOpcode::G_SMAX:
2984 case TargetOpcode::G_ABDS:
2985 Observer.changingInstr(MI);
2986 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2988 widenScalarDst(MI, WideTy);
2989 Observer.changedInstr(MI);
2990 return Legalized;
2991
2992 case TargetOpcode::G_SDIVREM:
2993 Observer.changingInstr(MI);
2994 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2995 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2996 widenScalarDst(MI, WideTy);
2997 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2998 widenScalarDst(MI, WideTy, 1);
2999 Observer.changedInstr(MI);
3000 return Legalized;
3001
3002 case TargetOpcode::G_ASHR:
3003 case TargetOpcode::G_LSHR:
3004 Observer.changingInstr(MI);
3005
3006 if (TypeIdx == 0) {
3007 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3008 : TargetOpcode::G_ZEXT;
3009
3010 widenScalarSrc(MI, WideTy, 1, CvtOp);
3011 widenScalarDst(MI, WideTy);
3012 } else {
3013 assert(TypeIdx == 1);
3014 // The "number of bits to shift" operand must preserve its value as an
3015 // unsigned integer:
3016 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3017 }
3018
3019 Observer.changedInstr(MI);
3020 return Legalized;
3021 case TargetOpcode::G_UDIV:
3022 case TargetOpcode::G_UREM:
3023 case TargetOpcode::G_ABDU:
3024 Observer.changingInstr(MI);
3025 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3026 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3027 widenScalarDst(MI, WideTy);
3028 Observer.changedInstr(MI);
3029 return Legalized;
3030 case TargetOpcode::G_UDIVREM:
3031 Observer.changingInstr(MI);
3032 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3033 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3034 widenScalarDst(MI, WideTy);
3035 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3036 widenScalarDst(MI, WideTy, 1);
3037 Observer.changedInstr(MI);
3038 return Legalized;
3039 case TargetOpcode::G_UMIN:
3040 case TargetOpcode::G_UMAX: {
3041 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3042
3043 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3044 unsigned ExtOpc =
3045 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3046 getApproximateEVTForLLT(WideTy, Ctx))
3047 ? TargetOpcode::G_SEXT
3048 : TargetOpcode::G_ZEXT;
3049
3050 Observer.changingInstr(MI);
3051 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3052 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3053 widenScalarDst(MI, WideTy);
3054 Observer.changedInstr(MI);
3055 return Legalized;
3056 }
3057
3058 case TargetOpcode::G_SELECT:
3059 Observer.changingInstr(MI);
3060 if (TypeIdx == 0) {
3061 // Perform operation at larger width (any extension is fine here, high
3062 // bits don't affect the result) and then truncate the result back to the
3063 // original type.
3064 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3065 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3066 widenScalarDst(MI, WideTy);
3067 } else {
3068 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3069 // Explicit extension is required here since high bits affect the result.
3070 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3071 }
3072 Observer.changedInstr(MI);
3073 return Legalized;
3074
3075 case TargetOpcode::G_FPEXT:
3076 if (TypeIdx != 1)
3077 return UnableToLegalize;
3078
3079 Observer.changingInstr(MI);
3080 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3081 Observer.changedInstr(MI);
3082 return Legalized;
3083 case TargetOpcode::G_FPTOSI:
3084 case TargetOpcode::G_FPTOUI:
3085 case TargetOpcode::G_INTRINSIC_LRINT:
3086 case TargetOpcode::G_INTRINSIC_LLRINT:
3087 case TargetOpcode::G_IS_FPCLASS:
3088 Observer.changingInstr(MI);
3089
3090 if (TypeIdx == 0)
3091 widenScalarDst(MI, WideTy);
3092 else
3093 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3094
3095 Observer.changedInstr(MI);
3096 return Legalized;
3097 case TargetOpcode::G_SITOFP:
3098 Observer.changingInstr(MI);
3099
3100 if (TypeIdx == 0)
3101 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3102 else
3103 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3104
3105 Observer.changedInstr(MI);
3106 return Legalized;
3107 case TargetOpcode::G_UITOFP:
3108 Observer.changingInstr(MI);
3109
3110 if (TypeIdx == 0)
3111 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3112 else
3113 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3114
3115 Observer.changedInstr(MI);
3116 return Legalized;
3117 case TargetOpcode::G_FPTOSI_SAT:
3118 case TargetOpcode::G_FPTOUI_SAT:
3119 Observer.changingInstr(MI);
3120
3121 if (TypeIdx == 0) {
3122 Register OldDst = MI.getOperand(0).getReg();
3123 LLT Ty = MRI.getType(OldDst);
3124 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3125 Register NewDst;
3126 MI.getOperand(0).setReg(ExtReg);
3127 uint64_t ShortBits = Ty.getScalarSizeInBits();
3128 uint64_t WideBits = WideTy.getScalarSizeInBits();
3129 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3130 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3131 // z = i16 fptosi_sat(a)
3132 // ->
3133 // x = i32 fptosi_sat(a)
3134 // y = smin(x, 32767)
3135 // z = smax(y, -32768)
3136 auto MaxVal = MIRBuilder.buildConstant(
3137 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3138 auto MinVal = MIRBuilder.buildConstant(
3139 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3140 Register MidReg =
3141 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3142 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3143 } else {
3144 // z = i16 fptoui_sat(a)
3145 // ->
3146 // x = i32 fptoui_sat(a)
3147 // y = smin(x, 65535)
3148 auto MaxVal = MIRBuilder.buildConstant(
3149 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3150 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3151 }
3152 MIRBuilder.buildTrunc(OldDst, NewDst);
3153 } else
3154 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3155
3156 Observer.changedInstr(MI);
3157 return Legalized;
3158 case TargetOpcode::G_LOAD:
3159 case TargetOpcode::G_SEXTLOAD:
3160 case TargetOpcode::G_ZEXTLOAD:
3161 Observer.changingInstr(MI);
3162 widenScalarDst(MI, WideTy);
3163 Observer.changedInstr(MI);
3164 return Legalized;
3165
3166 case TargetOpcode::G_STORE: {
3167 if (TypeIdx != 0)
3168 return UnableToLegalize;
3169
3170 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3171 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3172 if (!Ty.isScalar()) {
3173 // We need to widen the vector element type.
3174 Observer.changingInstr(MI);
3175 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3176 // We also need to adjust the MMO to turn this into a truncating store.
3177 MachineMemOperand &MMO = **MI.memoperands_begin();
3178 MachineFunction &MF = MIRBuilder.getMF();
3179 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3180 MI.setMemRefs(MF, {NewMMO});
3181 Observer.changedInstr(MI);
3182 return Legalized;
3183 }
3184
3185 Observer.changingInstr(MI);
3186
3187 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3188 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3189 widenScalarSrc(MI, WideTy, 0, ExtType);
3190
3191 Observer.changedInstr(MI);
3192 return Legalized;
3193 }
3194 case TargetOpcode::G_CONSTANT: {
3195 MachineOperand &SrcMO = MI.getOperand(1);
3196 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3197 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3198 MRI.getType(MI.getOperand(0).getReg()));
3199 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3200 ExtOpc == TargetOpcode::G_ANYEXT) &&
3201 "Illegal Extend");
3202 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3203 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3204 ? SrcVal.sext(WideTy.getSizeInBits())
3205 : SrcVal.zext(WideTy.getSizeInBits());
3206 Observer.changingInstr(MI);
3207 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3208
3209 widenScalarDst(MI, WideTy);
3210 Observer.changedInstr(MI);
3211 return Legalized;
3212 }
3213 case TargetOpcode::G_FCONSTANT: {
3214 // To avoid changing the bits of the constant due to extension to a larger
3215 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3216 MachineOperand &SrcMO = MI.getOperand(1);
3217 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3218 MIRBuilder.setInstrAndDebugLoc(MI);
3219 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3220 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3221 MI.eraseFromParent();
3222 return Legalized;
3223 }
3224 case TargetOpcode::G_IMPLICIT_DEF: {
3225 Observer.changingInstr(MI);
3226 widenScalarDst(MI, WideTy);
3227 Observer.changedInstr(MI);
3228 return Legalized;
3229 }
3230 case TargetOpcode::G_BRCOND:
3231 Observer.changingInstr(MI);
3232 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3233 Observer.changedInstr(MI);
3234 return Legalized;
3235
3236 case TargetOpcode::G_FCMP:
3237 Observer.changingInstr(MI);
3238 if (TypeIdx == 0)
3239 widenScalarDst(MI, WideTy);
3240 else {
3241 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3242 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3243 }
3244 Observer.changedInstr(MI);
3245 return Legalized;
3246
3247 case TargetOpcode::G_ICMP:
3248 Observer.changingInstr(MI);
3249 if (TypeIdx == 0)
3250 widenScalarDst(MI, WideTy);
3251 else {
3252 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3253 CmpInst::Predicate Pred =
3254 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3255
3256 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3257 unsigned ExtOpcode =
3258 (CmpInst::isSigned(Pred) ||
3259 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3260 getApproximateEVTForLLT(WideTy, Ctx)))
3261 ? TargetOpcode::G_SEXT
3262 : TargetOpcode::G_ZEXT;
3263 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3264 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3265 }
3266 Observer.changedInstr(MI);
3267 return Legalized;
3268
3269 case TargetOpcode::G_PTR_ADD:
3270 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3271 Observer.changingInstr(MI);
3272 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3273 Observer.changedInstr(MI);
3274 return Legalized;
3275
3276 case TargetOpcode::G_PHI: {
3277 assert(TypeIdx == 0 && "Expecting only Idx 0");
3278
3279 Observer.changingInstr(MI);
3280 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3281 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3282 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3283 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3284 }
3285
3286 MachineBasicBlock &MBB = *MI.getParent();
3287 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3288 widenScalarDst(MI, WideTy);
3289 Observer.changedInstr(MI);
3290 return Legalized;
3291 }
3292 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3293 if (TypeIdx == 0) {
3294 Register VecReg = MI.getOperand(1).getReg();
3295 LLT VecTy = MRI.getType(VecReg);
3296 Observer.changingInstr(MI);
3297
3299 MI,
3301 TargetOpcode::G_ANYEXT);
3302
3303 widenScalarDst(MI, WideTy, 0);
3304 Observer.changedInstr(MI);
3305 return Legalized;
3306 }
3307
3308 if (TypeIdx != 2)
3309 return UnableToLegalize;
3310 Observer.changingInstr(MI);
3311 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3312 Observer.changedInstr(MI);
3313 return Legalized;
3314 }
3315 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3316 if (TypeIdx == 0) {
3317 Observer.changingInstr(MI);
3318 const LLT WideEltTy = WideTy.getElementType();
3319
3320 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3321 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3322 widenScalarDst(MI, WideTy, 0);
3323 Observer.changedInstr(MI);
3324 return Legalized;
3325 }
3326
3327 if (TypeIdx == 1) {
3328 Observer.changingInstr(MI);
3329
3330 Register VecReg = MI.getOperand(1).getReg();
3331 LLT VecTy = MRI.getType(VecReg);
3332 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3333
3334 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3335 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3336 widenScalarDst(MI, WideVecTy, 0);
3337 Observer.changedInstr(MI);
3338 return Legalized;
3339 }
3340
3341 if (TypeIdx == 2) {
3342 Observer.changingInstr(MI);
3343 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3344 Observer.changedInstr(MI);
3345 return Legalized;
3346 }
3347
3348 return UnableToLegalize;
3349 }
3350 case TargetOpcode::G_FADD:
3351 case TargetOpcode::G_FMUL:
3352 case TargetOpcode::G_FSUB:
3353 case TargetOpcode::G_FMA:
3354 case TargetOpcode::G_FMAD:
3355 case TargetOpcode::G_FNEG:
3356 case TargetOpcode::G_FABS:
3357 case TargetOpcode::G_FCANONICALIZE:
3358 case TargetOpcode::G_FMINNUM:
3359 case TargetOpcode::G_FMAXNUM:
3360 case TargetOpcode::G_FMINNUM_IEEE:
3361 case TargetOpcode::G_FMAXNUM_IEEE:
3362 case TargetOpcode::G_FMINIMUM:
3363 case TargetOpcode::G_FMAXIMUM:
3364 case TargetOpcode::G_FMINIMUMNUM:
3365 case TargetOpcode::G_FMAXIMUMNUM:
3366 case TargetOpcode::G_FDIV:
3367 case TargetOpcode::G_FREM:
3368 case TargetOpcode::G_FCEIL:
3369 case TargetOpcode::G_FFLOOR:
3370 case TargetOpcode::G_FCOS:
3371 case TargetOpcode::G_FSIN:
3372 case TargetOpcode::G_FTAN:
3373 case TargetOpcode::G_FACOS:
3374 case TargetOpcode::G_FASIN:
3375 case TargetOpcode::G_FATAN:
3376 case TargetOpcode::G_FATAN2:
3377 case TargetOpcode::G_FCOSH:
3378 case TargetOpcode::G_FSINH:
3379 case TargetOpcode::G_FTANH:
3380 case TargetOpcode::G_FLOG10:
3381 case TargetOpcode::G_FLOG:
3382 case TargetOpcode::G_FLOG2:
3383 case TargetOpcode::G_FRINT:
3384 case TargetOpcode::G_FNEARBYINT:
3385 case TargetOpcode::G_FSQRT:
3386 case TargetOpcode::G_FEXP:
3387 case TargetOpcode::G_FEXP2:
3388 case TargetOpcode::G_FEXP10:
3389 case TargetOpcode::G_FPOW:
3390 case TargetOpcode::G_INTRINSIC_TRUNC:
3391 case TargetOpcode::G_INTRINSIC_ROUND:
3392 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3393 assert(TypeIdx == 0);
3394 Observer.changingInstr(MI);
3395
3396 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3397 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3398
3399 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3400 Observer.changedInstr(MI);
3401 return Legalized;
3402 case TargetOpcode::G_FMODF: {
3403 Observer.changingInstr(MI);
3404 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3405
3406 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3407 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3408 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3409 Observer.changedInstr(MI);
3410 return Legalized;
3411 }
3412 case TargetOpcode::G_FPOWI:
3413 case TargetOpcode::G_FLDEXP:
3414 case TargetOpcode::G_STRICT_FLDEXP: {
3415 if (TypeIdx == 0) {
3416 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3417 return UnableToLegalize;
3418
3419 Observer.changingInstr(MI);
3420 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3421 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3422 Observer.changedInstr(MI);
3423 return Legalized;
3424 }
3425
3426 if (TypeIdx == 1) {
3427 // For some reason SelectionDAG tries to promote to a libcall without
3428 // actually changing the integer type for promotion.
3429 Observer.changingInstr(MI);
3430 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3431 Observer.changedInstr(MI);
3432 return Legalized;
3433 }
3434
3435 return UnableToLegalize;
3436 }
3437 case TargetOpcode::G_FFREXP: {
3438 Observer.changingInstr(MI);
3439
3440 if (TypeIdx == 0) {
3441 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3442 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3443 } else {
3444 widenScalarDst(MI, WideTy, 1);
3445 }
3446
3447 Observer.changedInstr(MI);
3448 return Legalized;
3449 }
3450 case TargetOpcode::G_LROUND:
3451 case TargetOpcode::G_LLROUND:
3452 Observer.changingInstr(MI);
3453
3454 if (TypeIdx == 0)
3455 widenScalarDst(MI, WideTy);
3456 else
3457 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3458
3459 Observer.changedInstr(MI);
3460 return Legalized;
3461
3462 case TargetOpcode::G_INTTOPTR:
3463 if (TypeIdx != 1)
3464 return UnableToLegalize;
3465
3466 Observer.changingInstr(MI);
3467 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3468 Observer.changedInstr(MI);
3469 return Legalized;
3470 case TargetOpcode::G_PTRTOINT:
3471 if (TypeIdx != 0)
3472 return UnableToLegalize;
3473
3474 Observer.changingInstr(MI);
3475 widenScalarDst(MI, WideTy, 0);
3476 Observer.changedInstr(MI);
3477 return Legalized;
3478 case TargetOpcode::G_BUILD_VECTOR: {
3479 Observer.changingInstr(MI);
3480
3481 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3482 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3483 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3484
3485 // Avoid changing the result vector type if the source element type was
3486 // requested.
3487 if (TypeIdx == 1) {
3488 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3489 } else {
3490 widenScalarDst(MI, WideTy, 0);
3491 }
3492
3493 Observer.changedInstr(MI);
3494 return Legalized;
3495 }
3496 case TargetOpcode::G_SEXT_INREG:
3497 if (TypeIdx != 0)
3498 return UnableToLegalize;
3499
3500 Observer.changingInstr(MI);
3501 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3502 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3503 Observer.changedInstr(MI);
3504 return Legalized;
3505 case TargetOpcode::G_PTRMASK: {
3506 if (TypeIdx != 1)
3507 return UnableToLegalize;
3508 Observer.changingInstr(MI);
3509 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3510 Observer.changedInstr(MI);
3511 return Legalized;
3512 }
3513 case TargetOpcode::G_VECREDUCE_ADD: {
3514 if (TypeIdx != 1)
3515 return UnableToLegalize;
3516 Observer.changingInstr(MI);
3517 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3518 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3519 Observer.changedInstr(MI);
3520 return Legalized;
3521 }
3522 case TargetOpcode::G_VECREDUCE_FADD:
3523 case TargetOpcode::G_VECREDUCE_FMUL:
3524 case TargetOpcode::G_VECREDUCE_FMIN:
3525 case TargetOpcode::G_VECREDUCE_FMAX:
3526 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3527 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3528 if (TypeIdx != 0)
3529 return UnableToLegalize;
3530 Observer.changingInstr(MI);
3531 Register VecReg = MI.getOperand(1).getReg();
3532 LLT VecTy = MRI.getType(VecReg);
3533 LLT WideVecTy = VecTy.changeElementType(WideTy);
3534 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3535 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3536 Observer.changedInstr(MI);
3537 return Legalized;
3538 }
3539 case TargetOpcode::G_VSCALE: {
3540 MachineOperand &SrcMO = MI.getOperand(1);
3541 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3542 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3543 // The CImm is always a signed value
3544 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3545 Observer.changingInstr(MI);
3546 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3547 widenScalarDst(MI, WideTy);
3548 Observer.changedInstr(MI);
3549 return Legalized;
3550 }
3551 case TargetOpcode::G_SPLAT_VECTOR: {
3552 if (TypeIdx != 1)
3553 return UnableToLegalize;
3554
3555 Observer.changingInstr(MI);
3556 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3557 Observer.changedInstr(MI);
3558 return Legalized;
3559 }
3560 case TargetOpcode::G_INSERT_SUBVECTOR: {
3561 if (TypeIdx != 0)
3562 return UnableToLegalize;
3563
3565 Register BigVec = IS.getBigVec();
3566 Register SubVec = IS.getSubVec();
3567
3568 LLT SubVecTy = MRI.getType(SubVec);
3569 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3570
3571 // Widen the G_INSERT_SUBVECTOR
3572 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3573 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3574 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3575 IS.getIndexImm());
3576
3577 // Truncate back down
3578 auto SplatZero = MIRBuilder.buildSplatVector(
3579 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3580 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3581 SplatZero);
3582
3583 MI.eraseFromParent();
3584
3585 return Legalized;
3586 }
3587 }
3588}
3589
3591 MachineIRBuilder &B, Register Src, LLT Ty) {
3592 auto Unmerge = B.buildUnmerge(Ty, Src);
3593 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3594 Pieces.push_back(Unmerge.getReg(I));
3595}
3596
3597static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3598 MachineIRBuilder &MIRBuilder) {
3599 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3600 MachineFunction &MF = MIRBuilder.getMF();
3601 const DataLayout &DL = MIRBuilder.getDataLayout();
3602 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3603 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3604 LLT DstLLT = MRI.getType(DstReg);
3605
3606 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3607
3608 auto Addr = MIRBuilder.buildConstantPool(
3609 AddrPtrTy,
3610 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3611
3612 MachineMemOperand *MMO =
3614 MachineMemOperand::MOLoad, DstLLT, Alignment);
3615
3616 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3617}
3618
3621 const MachineOperand &ConstOperand = MI.getOperand(1);
3622 const Constant *ConstantVal = ConstOperand.getCImm();
3623
3624 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3625 MI.eraseFromParent();
3626
3627 return Legalized;
3628}
3629
3632 const MachineOperand &ConstOperand = MI.getOperand(1);
3633 const Constant *ConstantVal = ConstOperand.getFPImm();
3634
3635 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3636 MI.eraseFromParent();
3637
3638 return Legalized;
3639}
3640
3643 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3644 if (SrcTy.isVector()) {
3645 LLT SrcEltTy = SrcTy.getElementType();
3647
3648 if (DstTy.isVector()) {
3649 int NumDstElt = DstTy.getNumElements();
3650 int NumSrcElt = SrcTy.getNumElements();
3651
3652 LLT DstEltTy = DstTy.getElementType();
3653 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3654 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3655
3656 // If there's an element size mismatch, insert intermediate casts to match
3657 // the result element type.
3658 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3659 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3660 //
3661 // =>
3662 //
3663 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3664 // %3:_(<2 x s8>) = G_BITCAST %2
3665 // %4:_(<2 x s8>) = G_BITCAST %3
3666 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3667 DstCastTy = DstTy.changeVectorElementCount(
3668 ElementCount::getFixed(NumDstElt / NumSrcElt));
3669 SrcPartTy = SrcEltTy;
3670 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3671 //
3672 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3673 //
3674 // =>
3675 //
3676 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3677 // %3:_(s16) = G_BITCAST %2
3678 // %4:_(s16) = G_BITCAST %3
3679 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3680 SrcPartTy = SrcTy.changeVectorElementCount(
3681 ElementCount::getFixed(NumSrcElt / NumDstElt));
3682 DstCastTy = DstEltTy;
3683 }
3684
3685 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3686 for (Register &SrcReg : SrcRegs)
3687 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3688 } else
3689 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3690
3691 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3692 MI.eraseFromParent();
3693 return Legalized;
3694 }
3695
3696 if (DstTy.isVector()) {
3698 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3699 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3700 MI.eraseFromParent();
3701 return Legalized;
3702 }
3703
3704 return UnableToLegalize;
3705}
3706
3707/// Figure out the bit offset into a register when coercing a vector index for
3708/// the wide element type. This is only for the case when promoting vector to
3709/// one with larger elements.
3710//
3711///
3712/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3713/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3715 Register Idx,
3716 unsigned NewEltSize,
3717 unsigned OldEltSize) {
3718 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3719 LLT IdxTy = B.getMRI()->getType(Idx);
3720
3721 // Now figure out the amount we need to shift to get the target bits.
3722 auto OffsetMask = B.buildConstant(
3723 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3724 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3725 return B.buildShl(IdxTy, OffsetIdx,
3726 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3727}
3728
3729/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3730/// is casting to a vector with a smaller element size, perform multiple element
3731/// extracts and merge the results. If this is coercing to a vector with larger
3732/// elements, index the bitcasted vector and extract the target element with bit
3733/// operations. This is intended to force the indexing in the native register
3734/// size for architectures that can dynamically index the register file.
3737 LLT CastTy) {
3738 if (TypeIdx != 1)
3739 return UnableToLegalize;
3740
3741 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3742
3743 LLT SrcEltTy = SrcVecTy.getElementType();
3744 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3745 unsigned OldNumElts = SrcVecTy.getNumElements();
3746
3747 LLT NewEltTy = CastTy.getScalarType();
3748 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3749
3750 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3751 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3752 if (NewNumElts > OldNumElts) {
3753 // Decreasing the vector element size
3754 //
3755 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3756 // =>
3757 // v4i32:castx = bitcast x:v2i64
3758 //
3759 // i64 = bitcast
3760 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3761 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3762 //
3763 if (NewNumElts % OldNumElts != 0)
3764 return UnableToLegalize;
3765
3766 // Type of the intermediate result vector.
3767 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3768 LLT MidTy =
3769 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3770
3771 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3772
3773 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3774 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3775
3776 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3777 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3778 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3779 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3780 NewOps[I] = Elt.getReg(0);
3781 }
3782
3783 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3784 MIRBuilder.buildBitcast(Dst, NewVec);
3785 MI.eraseFromParent();
3786 return Legalized;
3787 }
3788
3789 if (NewNumElts < OldNumElts) {
3790 if (NewEltSize % OldEltSize != 0)
3791 return UnableToLegalize;
3792
3793 // This only depends on powers of 2 because we use bit tricks to figure out
3794 // the bit offset we need to shift to get the target element. A general
3795 // expansion could emit division/multiply.
3796 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3797 return UnableToLegalize;
3798
3799 // Increasing the vector element size.
3800 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3801 //
3802 // =>
3803 //
3804 // %cast = G_BITCAST %vec
3805 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3806 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3807 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3808 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3809 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3810 // %elt = G_TRUNC %elt_bits
3811
3812 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3813 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3814
3815 // Divide to get the index in the wider element type.
3816 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3817
3818 Register WideElt = CastVec;
3819 if (CastTy.isVector()) {
3820 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3821 ScaledIdx).getReg(0);
3822 }
3823
3824 // Compute the bit offset into the register of the target element.
3826 MIRBuilder, Idx, NewEltSize, OldEltSize);
3827
3828 // Shift the wide element to get the target element.
3829 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3830 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3831 MI.eraseFromParent();
3832 return Legalized;
3833 }
3834
3835 return UnableToLegalize;
3836}
3837
3838/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3839/// TargetReg, while preserving other bits in \p TargetReg.
3840///
3841/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3843 Register TargetReg, Register InsertReg,
3844 Register OffsetBits) {
3845 LLT TargetTy = B.getMRI()->getType(TargetReg);
3846 LLT InsertTy = B.getMRI()->getType(InsertReg);
3847 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3848 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3849
3850 // Produce a bitmask of the value to insert
3851 auto EltMask = B.buildConstant(
3852 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3853 InsertTy.getSizeInBits()));
3854 // Shift it into position
3855 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3856 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3857
3858 // Clear out the bits in the wide element
3859 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3860
3861 // The value to insert has all zeros already, so stick it into the masked
3862 // wide element.
3863 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3864}
3865
3866/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3867/// is increasing the element size, perform the indexing in the target element
3868/// type, and use bit operations to insert at the element position. This is
3869/// intended for architectures that can dynamically index the register file and
3870/// want to force indexing in the native register size.
3873 LLT CastTy) {
3874 if (TypeIdx != 0)
3875 return UnableToLegalize;
3876
3877 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3878 MI.getFirst4RegLLTs();
3879 LLT VecTy = DstTy;
3880
3881 LLT VecEltTy = VecTy.getElementType();
3882 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3883 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3884 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3885
3886 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3887 unsigned OldNumElts = VecTy.getNumElements();
3888
3889 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3890 if (NewNumElts < OldNumElts) {
3891 if (NewEltSize % OldEltSize != 0)
3892 return UnableToLegalize;
3893
3894 // This only depends on powers of 2 because we use bit tricks to figure out
3895 // the bit offset we need to shift to get the target element. A general
3896 // expansion could emit division/multiply.
3897 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3898 return UnableToLegalize;
3899
3900 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3901 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3902
3903 // Divide to get the index in the wider element type.
3904 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3905
3906 Register ExtractedElt = CastVec;
3907 if (CastTy.isVector()) {
3908 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3909 ScaledIdx).getReg(0);
3910 }
3911
3912 // Compute the bit offset into the register of the target element.
3914 MIRBuilder, Idx, NewEltSize, OldEltSize);
3915
3916 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3917 Val, OffsetBits);
3918 if (CastTy.isVector()) {
3919 InsertedElt = MIRBuilder.buildInsertVectorElement(
3920 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3921 }
3922
3923 MIRBuilder.buildBitcast(Dst, InsertedElt);
3924 MI.eraseFromParent();
3925 return Legalized;
3926 }
3927
3928 return UnableToLegalize;
3929}
3930
3931// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3932// those that have smaller than legal operands.
3933//
3934// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3935//
3936// ===>
3937//
3938// s32 = G_BITCAST <4 x s8>
3939// s32 = G_BITCAST <4 x s8>
3940// s32 = G_BITCAST <4 x s8>
3941// s32 = G_BITCAST <4 x s8>
3942// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3943// <16 x s8> = G_BITCAST <4 x s32>
3946 LLT CastTy) {
3947 // Convert it to CONCAT instruction
3948 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3949 if (!ConcatMI) {
3950 return UnableToLegalize;
3951 }
3952
3953 // Check if bitcast is Legal
3954 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3955 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3956
3957 // Check if the build vector is Legal
3958 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3959 return UnableToLegalize;
3960 }
3961
3962 // Bitcast the sources
3963 SmallVector<Register> BitcastRegs;
3964 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3965 BitcastRegs.push_back(
3966 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3967 .getReg(0));
3968 }
3969
3970 // Build the scalar values into a vector
3971 Register BuildReg =
3972 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3973 MIRBuilder.buildBitcast(DstReg, BuildReg);
3974
3975 MI.eraseFromParent();
3976 return Legalized;
3977}
3978
3979// This bitcasts a shuffle vector to a different type currently of the same
3980// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3981// will be used instead.
3982//
3983// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3984// ===>
3985// <4 x s64> = G_PTRTOINT <4 x p0>
3986// <4 x s64> = G_PTRTOINT <4 x p0>
3987// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3988// <16 x p0> = G_INTTOPTR <16 x s64>
3991 LLT CastTy) {
3992 auto ShuffleMI = cast<GShuffleVector>(&MI);
3993 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3994 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3995
3996 // We currently only handle vectors of the same size.
3997 if (TypeIdx != 0 ||
3998 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3999 CastTy.getElementCount() != DstTy.getElementCount())
4000 return UnableToLegalize;
4001
4002 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4003
4004 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4005 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4006 auto Shuf =
4007 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4008 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4009
4010 MI.eraseFromParent();
4011 return Legalized;
4012}
4013
4014/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4015///
4016/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4017///
4018/// ===>
4019///
4020/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4021/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4022/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4025 LLT CastTy) {
4026 auto ES = cast<GExtractSubvector>(&MI);
4027
4028 if (!CastTy.isVector())
4029 return UnableToLegalize;
4030
4031 if (TypeIdx != 0)
4032 return UnableToLegalize;
4033
4034 Register Dst = ES->getReg(0);
4035 Register Src = ES->getSrcVec();
4036 uint64_t Idx = ES->getIndexImm();
4037
4038 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4039
4040 LLT DstTy = MRI.getType(Dst);
4041 LLT SrcTy = MRI.getType(Src);
4042 ElementCount DstTyEC = DstTy.getElementCount();
4043 ElementCount SrcTyEC = SrcTy.getElementCount();
4044 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4045 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4046
4047 if (DstTy == CastTy)
4048 return Legalized;
4049
4050 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4051 return UnableToLegalize;
4052
4053 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4054 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4055 if (CastEltSize < DstEltSize)
4056 return UnableToLegalize;
4057
4058 auto AdjustAmt = CastEltSize / DstEltSize;
4059 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4060 SrcTyMinElts % AdjustAmt != 0)
4061 return UnableToLegalize;
4062
4063 Idx /= AdjustAmt;
4064 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4065 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4066 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4067 MIRBuilder.buildBitcast(Dst, PromotedES);
4068
4069 ES->eraseFromParent();
4070 return Legalized;
4071}
4072
4073/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4074///
4075/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4076/// <vscale x 8 x i1>,
4077/// N
4078///
4079/// ===>
4080///
4081/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4082/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4083/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4084/// <vscale x 1 x i8>, N / 8
4085/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4088 LLT CastTy) {
4089 auto ES = cast<GInsertSubvector>(&MI);
4090
4091 if (!CastTy.isVector())
4092 return UnableToLegalize;
4093
4094 if (TypeIdx != 0)
4095 return UnableToLegalize;
4096
4097 Register Dst = ES->getReg(0);
4098 Register BigVec = ES->getBigVec();
4099 Register SubVec = ES->getSubVec();
4100 uint64_t Idx = ES->getIndexImm();
4101
4102 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4103
4104 LLT DstTy = MRI.getType(Dst);
4105 LLT BigVecTy = MRI.getType(BigVec);
4106 LLT SubVecTy = MRI.getType(SubVec);
4107
4108 if (DstTy == CastTy)
4109 return Legalized;
4110
4111 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4112 return UnableToLegalize;
4113
4114 ElementCount DstTyEC = DstTy.getElementCount();
4115 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4116 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4117 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4118 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4119 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4120
4121 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4122 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4123 if (CastEltSize < DstEltSize)
4124 return UnableToLegalize;
4125
4126 auto AdjustAmt = CastEltSize / DstEltSize;
4127 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4128 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4129 return UnableToLegalize;
4130
4131 Idx /= AdjustAmt;
4132 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4133 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4134 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4135 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4136 auto PromotedIS =
4137 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4138 MIRBuilder.buildBitcast(Dst, PromotedIS);
4139
4140 ES->eraseFromParent();
4141 return Legalized;
4142}
4143
4145 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4146 Register DstReg = LoadMI.getDstReg();
4147 Register PtrReg = LoadMI.getPointerReg();
4148 LLT DstTy = MRI.getType(DstReg);
4149 MachineMemOperand &MMO = LoadMI.getMMO();
4150 LLT MemTy = MMO.getMemoryType();
4151 MachineFunction &MF = MIRBuilder.getMF();
4152
4153 unsigned MemSizeInBits = MemTy.getSizeInBits();
4154 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4155
4156 if (MemSizeInBits != MemStoreSizeInBits) {
4157 if (MemTy.isVector())
4158 return UnableToLegalize;
4159
4160 // Promote to a byte-sized load if not loading an integral number of
4161 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4162 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4163 MachineMemOperand *NewMMO =
4164 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4165
4166 Register LoadReg = DstReg;
4167 LLT LoadTy = DstTy;
4168
4169 // If this wasn't already an extending load, we need to widen the result
4170 // register to avoid creating a load with a narrower result than the source.
4171 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4172 LoadTy = WideMemTy;
4173 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4174 }
4175
4176 if (isa<GSExtLoad>(LoadMI)) {
4177 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4178 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4179 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4180 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4181 // The extra bits are guaranteed to be zero, since we stored them that
4182 // way. A zext load from Wide thus automatically gives zext from MemVT.
4183 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4184 } else {
4185 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4186 }
4187
4188 if (DstTy != LoadTy)
4189 MIRBuilder.buildTrunc(DstReg, LoadReg);
4190
4191 LoadMI.eraseFromParent();
4192 return Legalized;
4193 }
4194
4195 // Big endian lowering not implemented.
4196 if (MIRBuilder.getDataLayout().isBigEndian())
4197 return UnableToLegalize;
4198
4199 // This load needs splitting into power of 2 sized loads.
4200 //
4201 // Our strategy here is to generate anyextending loads for the smaller
4202 // types up to next power-2 result type, and then combine the two larger
4203 // result values together, before truncating back down to the non-pow-2
4204 // type.
4205 // E.g. v1 = i24 load =>
4206 // v2 = i32 zextload (2 byte)
4207 // v3 = i32 load (1 byte)
4208 // v4 = i32 shl v3, 16
4209 // v5 = i32 or v4, v2
4210 // v1 = i24 trunc v5
4211 // By doing this we generate the correct truncate which should get
4212 // combined away as an artifact with a matching extend.
4213
4214 uint64_t LargeSplitSize, SmallSplitSize;
4215
4216 if (!isPowerOf2_32(MemSizeInBits)) {
4217 // This load needs splitting into power of 2 sized loads.
4218 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4219 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4220 } else {
4221 // This is already a power of 2, but we still need to split this in half.
4222 //
4223 // Assume we're being asked to decompose an unaligned load.
4224 // TODO: If this requires multiple splits, handle them all at once.
4225 auto &Ctx = MF.getFunction().getContext();
4226 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4227 return UnableToLegalize;
4228
4229 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4230 }
4231
4232 if (MemTy.isVector()) {
4233 // TODO: Handle vector extloads
4234 if (MemTy != DstTy)
4235 return UnableToLegalize;
4236
4237 Align Alignment = LoadMI.getAlign();
4238 // Given an alignment larger than the size of the memory, we can increase
4239 // the size of the load without needing to scalarize it.
4240 if (Alignment.value() * 8 > MemSizeInBits &&
4242 LLT MoreTy = DstTy.changeVectorElementCount(
4244 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4245 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4246 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4247 NewLoad.getReg(0));
4248 LoadMI.eraseFromParent();
4249 return Legalized;
4250 }
4251
4252 // TODO: We can do better than scalarizing the vector and at least split it
4253 // in half.
4254 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4255 }
4256
4257 MachineMemOperand *LargeMMO =
4258 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4259 MachineMemOperand *SmallMMO =
4260 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4261
4262 LLT PtrTy = MRI.getType(PtrReg);
4263 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4264 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4265 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4266 PtrReg, *LargeMMO);
4267
4268 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4269 LargeSplitSize / 8);
4270 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4271 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4272 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4273 SmallPtr, *SmallMMO);
4274
4275 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4276 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4277
4278 if (AnyExtTy == DstTy)
4279 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4280 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4281 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4282 MIRBuilder.buildTrunc(DstReg, {Or});
4283 } else {
4284 assert(DstTy.isPointer() && "expected pointer");
4285 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4286
4287 // FIXME: We currently consider this to be illegal for non-integral address
4288 // spaces, but we need still need a way to reinterpret the bits.
4289 MIRBuilder.buildIntToPtr(DstReg, Or);
4290 }
4291
4292 LoadMI.eraseFromParent();
4293 return Legalized;
4294}
4295
4297 // Lower a non-power of 2 store into multiple pow-2 stores.
4298 // E.g. split an i24 store into an i16 store + i8 store.
4299 // We do this by first extending the stored value to the next largest power
4300 // of 2 type, and then using truncating stores to store the components.
4301 // By doing this, likewise with G_LOAD, generate an extend that can be
4302 // artifact-combined away instead of leaving behind extracts.
4303 Register SrcReg = StoreMI.getValueReg();
4304 Register PtrReg = StoreMI.getPointerReg();
4305 LLT SrcTy = MRI.getType(SrcReg);
4306 MachineFunction &MF = MIRBuilder.getMF();
4307 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4308 LLT MemTy = MMO.getMemoryType();
4309
4310 unsigned StoreWidth = MemTy.getSizeInBits();
4311 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4312
4313 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4314 // Promote to a byte-sized store with upper bits zero if not
4315 // storing an integral number of bytes. For example, promote
4316 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4317 LLT WideTy = LLT::scalar(StoreSizeInBits);
4318
4319 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4320 // Avoid creating a store with a narrower source than result.
4321 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4322 SrcTy = WideTy;
4323 }
4324
4325 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4326
4327 MachineMemOperand *NewMMO =
4328 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4329 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4330 StoreMI.eraseFromParent();
4331 return Legalized;
4332 }
4333
4334 if (MemTy.isVector()) {
4335 if (MemTy != SrcTy)
4336 return scalarizeVectorBooleanStore(StoreMI);
4337
4338 // TODO: We can do better than scalarizing the vector and at least split it
4339 // in half.
4340 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4341 }
4342
4343 unsigned MemSizeInBits = MemTy.getSizeInBits();
4344 uint64_t LargeSplitSize, SmallSplitSize;
4345
4346 if (!isPowerOf2_32(MemSizeInBits)) {
4347 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4348 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4349 } else {
4350 auto &Ctx = MF.getFunction().getContext();
4351 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4352 return UnableToLegalize; // Don't know what we're being asked to do.
4353
4354 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4355 }
4356
4357 // Extend to the next pow-2. If this store was itself the result of lowering,
4358 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4359 // that's wider than the stored size.
4360 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4361 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4362
4363 if (SrcTy.isPointer()) {
4364 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4365 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4366 }
4367
4368 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4369
4370 // Obtain the smaller value by shifting away the larger value.
4371 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4372 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4373
4374 // Generate the PtrAdd and truncating stores.
4375 LLT PtrTy = MRI.getType(PtrReg);
4376 auto OffsetCst = MIRBuilder.buildConstant(
4377 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4378 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4379
4380 MachineMemOperand *LargeMMO =
4381 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4382 MachineMemOperand *SmallMMO =
4383 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4384 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4385 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4386 StoreMI.eraseFromParent();
4387 return Legalized;
4388}
4389
4392 Register SrcReg = StoreMI.getValueReg();
4393 Register PtrReg = StoreMI.getPointerReg();
4394 LLT SrcTy = MRI.getType(SrcReg);
4395 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4396 LLT MemTy = MMO.getMemoryType();
4397 LLT MemScalarTy = MemTy.getElementType();
4398 MachineFunction &MF = MIRBuilder.getMF();
4399
4400 assert(SrcTy.isVector() && "Expect a vector store type");
4401
4402 if (!MemScalarTy.isByteSized()) {
4403 // We need to build an integer scalar of the vector bit pattern.
4404 // It's not legal for us to add padding when storing a vector.
4405 unsigned NumBits = MemTy.getSizeInBits();
4406 LLT IntTy = LLT::scalar(NumBits);
4407 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4408 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4409
4410 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4411 auto Elt = MIRBuilder.buildExtractVectorElement(
4412 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4413 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4414 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4415 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4416 ? (MemTy.getNumElements() - 1) - I
4417 : I;
4418 auto ShiftAmt = MIRBuilder.buildConstant(
4419 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4420 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4421 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4422 }
4423 auto PtrInfo = MMO.getPointerInfo();
4424 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4425 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4426 StoreMI.eraseFromParent();
4427 return Legalized;
4428 }
4429
4430 // TODO: implement simple scalarization.
4431 return UnableToLegalize;
4432}
4433
4435LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4436 switch (MI.getOpcode()) {
4437 case TargetOpcode::G_LOAD: {
4438 if (TypeIdx != 0)
4439 return UnableToLegalize;
4440 MachineMemOperand &MMO = **MI.memoperands_begin();
4441
4442 // Not sure how to interpret a bitcast of an extending load.
4443 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4444 return UnableToLegalize;
4445
4446 Observer.changingInstr(MI);
4447 bitcastDst(MI, CastTy, 0);
4448 MMO.setType(CastTy);
4449 // The range metadata is no longer valid when reinterpreted as a different
4450 // type.
4451 MMO.clearRanges();
4452 Observer.changedInstr(MI);
4453 return Legalized;
4454 }
4455 case TargetOpcode::G_STORE: {
4456 if (TypeIdx != 0)
4457 return UnableToLegalize;
4458
4459 MachineMemOperand &MMO = **MI.memoperands_begin();
4460
4461 // Not sure how to interpret a bitcast of a truncating store.
4462 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4463 return UnableToLegalize;
4464
4465 Observer.changingInstr(MI);
4466 bitcastSrc(MI, CastTy, 0);
4467 MMO.setType(CastTy);
4468 Observer.changedInstr(MI);
4469 return Legalized;
4470 }
4471 case TargetOpcode::G_SELECT: {
4472 if (TypeIdx != 0)
4473 return UnableToLegalize;
4474
4475 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4476 LLVM_DEBUG(
4477 dbgs() << "bitcast action not implemented for vector select\n");
4478 return UnableToLegalize;
4479 }
4480
4481 Observer.changingInstr(MI);
4482 bitcastSrc(MI, CastTy, 2);
4483 bitcastSrc(MI, CastTy, 3);
4484 bitcastDst(MI, CastTy, 0);
4485 Observer.changedInstr(MI);
4486 return Legalized;
4487 }
4488 case TargetOpcode::G_AND:
4489 case TargetOpcode::G_OR:
4490 case TargetOpcode::G_XOR: {
4491 Observer.changingInstr(MI);
4492 bitcastSrc(MI, CastTy, 1);
4493 bitcastSrc(MI, CastTy, 2);
4494 bitcastDst(MI, CastTy, 0);
4495 Observer.changedInstr(MI);
4496 return Legalized;
4497 }
4498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4499 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4500 case TargetOpcode::G_INSERT_VECTOR_ELT:
4501 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4502 case TargetOpcode::G_CONCAT_VECTORS:
4503 return bitcastConcatVector(MI, TypeIdx, CastTy);
4504 case TargetOpcode::G_SHUFFLE_VECTOR:
4505 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4506 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4507 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4508 case TargetOpcode::G_INSERT_SUBVECTOR:
4509 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4510 default:
4511 return UnableToLegalize;
4512 }
4513}
4514
4515// Legalize an instruction by changing the opcode in place.
4516void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4518 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4520}
4521
4523LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4524 using namespace TargetOpcode;
4525
4526 switch(MI.getOpcode()) {
4527 default:
4528 return UnableToLegalize;
4529 case TargetOpcode::G_FCONSTANT:
4530 return lowerFConstant(MI);
4531 case TargetOpcode::G_BITCAST:
4532 return lowerBitcast(MI);
4533 case TargetOpcode::G_SREM:
4534 case TargetOpcode::G_UREM: {
4535 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4536 auto Quot =
4537 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4538 {MI.getOperand(1), MI.getOperand(2)});
4539
4540 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4541 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4542 MI.eraseFromParent();
4543 return Legalized;
4544 }
4545 case TargetOpcode::G_SADDO:
4546 case TargetOpcode::G_SSUBO:
4547 return lowerSADDO_SSUBO(MI);
4548 case TargetOpcode::G_SADDE:
4549 return lowerSADDE(MI);
4550 case TargetOpcode::G_SSUBE:
4551 return lowerSSUBE(MI);
4552 case TargetOpcode::G_UMULH:
4553 case TargetOpcode::G_SMULH:
4554 return lowerSMULH_UMULH(MI);
4555 case TargetOpcode::G_SMULO:
4556 case TargetOpcode::G_UMULO: {
4557 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4558 // result.
4559 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4560 LLT Ty = MRI.getType(Res);
4561
4562 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4563 ? TargetOpcode::G_SMULH
4564 : TargetOpcode::G_UMULH;
4565
4566 Observer.changingInstr(MI);
4567 const auto &TII = MIRBuilder.getTII();
4568 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4569 MI.removeOperand(1);
4570 Observer.changedInstr(MI);
4571
4572 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4573 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4574
4575 // Move insert point forward so we can use the Res register if needed.
4576 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4577
4578 // For *signed* multiply, overflow is detected by checking:
4579 // (hi != (lo >> bitwidth-1))
4580 if (Opcode == TargetOpcode::G_SMULH) {
4581 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4582 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4583 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4584 } else {
4585 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4586 }
4587 return Legalized;
4588 }
4589 case TargetOpcode::G_FNEG: {
4590 auto [Res, SubByReg] = MI.getFirst2Regs();
4591 LLT Ty = MRI.getType(Res);
4592
4593 auto SignMask = MIRBuilder.buildConstant(
4594 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4595 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4596 MI.eraseFromParent();
4597 return Legalized;
4598 }
4599 case TargetOpcode::G_FSUB:
4600 case TargetOpcode::G_STRICT_FSUB: {
4601 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4602 LLT Ty = MRI.getType(Res);
4603
4604 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4605 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4606
4607 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4608 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4609 else
4610 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4611
4612 MI.eraseFromParent();
4613 return Legalized;
4614 }
4615 case TargetOpcode::G_FMAD:
4616 return lowerFMad(MI);
4617 case TargetOpcode::G_FFLOOR:
4618 return lowerFFloor(MI);
4619 case TargetOpcode::G_LROUND:
4620 case TargetOpcode::G_LLROUND: {
4621 Register DstReg = MI.getOperand(0).getReg();
4622 Register SrcReg = MI.getOperand(1).getReg();
4623 LLT SrcTy = MRI.getType(SrcReg);
4624 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4625 {SrcReg});
4626 MIRBuilder.buildFPTOSI(DstReg, Round);
4627 MI.eraseFromParent();
4628 return Legalized;
4629 }
4630 case TargetOpcode::G_INTRINSIC_ROUND:
4631 return lowerIntrinsicRound(MI);
4632 case TargetOpcode::G_FRINT: {
4633 // Since round even is the assumed rounding mode for unconstrained FP
4634 // operations, rint and roundeven are the same operation.
4635 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4636 return Legalized;
4637 }
4638 case TargetOpcode::G_INTRINSIC_LRINT:
4639 case TargetOpcode::G_INTRINSIC_LLRINT: {
4640 Register DstReg = MI.getOperand(0).getReg();
4641 Register SrcReg = MI.getOperand(1).getReg();
4642 LLT SrcTy = MRI.getType(SrcReg);
4643 auto Round =
4644 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4645 MIRBuilder.buildFPTOSI(DstReg, Round);
4646 MI.eraseFromParent();
4647 return Legalized;
4648 }
4649 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4650 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4651 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4652 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4653 **MI.memoperands_begin());
4654 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4655 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4656 MI.eraseFromParent();
4657 return Legalized;
4658 }
4659 case TargetOpcode::G_LOAD:
4660 case TargetOpcode::G_SEXTLOAD:
4661 case TargetOpcode::G_ZEXTLOAD:
4662 return lowerLoad(cast<GAnyLoad>(MI));
4663 case TargetOpcode::G_STORE:
4664 return lowerStore(cast<GStore>(MI));
4665 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4666 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4667 case TargetOpcode::G_CTLZ:
4668 case TargetOpcode::G_CTTZ:
4669 case TargetOpcode::G_CTPOP:
4670 case TargetOpcode::G_CTLS:
4671 return lowerBitCount(MI);
4672 case G_UADDO: {
4673 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4674
4675 Register NewRes = MRI.cloneVirtualRegister(Res);
4676
4677 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4678 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4679
4680 MIRBuilder.buildCopy(Res, NewRes);
4681
4682 MI.eraseFromParent();
4683 return Legalized;
4684 }
4685 case G_UADDE: {
4686 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4687 const LLT CondTy = MRI.getType(CarryOut);
4688 const LLT Ty = MRI.getType(Res);
4689
4690 Register NewRes = MRI.cloneVirtualRegister(Res);
4691
4692 // Initial add of the two operands.
4693 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4694
4695 // Initial check for carry.
4696 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4697
4698 // Add the sum and the carry.
4699 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4700 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4701
4702 // Second check for carry. We can only carry if the initial sum is all 1s
4703 // and the carry is set, resulting in a new sum of 0.
4704 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4705 auto ResEqZero =
4706 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4707 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4708 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4709
4710 MIRBuilder.buildCopy(Res, NewRes);
4711
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715 case G_USUBO: {
4716 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4717
4718 MIRBuilder.buildSub(Res, LHS, RHS);
4719 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4720
4721 MI.eraseFromParent();
4722 return Legalized;
4723 }
4724 case G_USUBE: {
4725 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4726 const LLT CondTy = MRI.getType(BorrowOut);
4727 const LLT Ty = MRI.getType(Res);
4728
4729 // Initial subtract of the two operands.
4730 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4731
4732 // Initial check for borrow.
4733 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4734
4735 // Subtract the borrow from the first subtract.
4736 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4737 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4738
4739 // Second check for borrow. We can only borrow if the initial difference is
4740 // 0 and the borrow is set, resulting in a new difference of all 1s.
4741 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4742 auto TmpResEqZero =
4743 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4744 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4745 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4746
4747 MI.eraseFromParent();
4748 return Legalized;
4749 }
4750 case G_UITOFP:
4751 return lowerUITOFP(MI);
4752 case G_SITOFP:
4753 return lowerSITOFP(MI);
4754 case G_FPTOUI:
4755 return lowerFPTOUI(MI);
4756 case G_FPTOSI:
4757 return lowerFPTOSI(MI);
4758 case G_FPTOUI_SAT:
4759 case G_FPTOSI_SAT:
4760 return lowerFPTOINT_SAT(MI);
4761 case G_FPTRUNC:
4762 return lowerFPTRUNC(MI);
4763 case G_FPOWI:
4764 return lowerFPOWI(MI);
4765 case G_SMIN:
4766 case G_SMAX:
4767 case G_UMIN:
4768 case G_UMAX:
4769 return lowerMinMax(MI);
4770 case G_SCMP:
4771 case G_UCMP:
4772 return lowerThreewayCompare(MI);
4773 case G_FCOPYSIGN:
4774 return lowerFCopySign(MI);
4775 case G_FMINNUM:
4776 case G_FMAXNUM:
4777 case G_FMINIMUMNUM:
4778 case G_FMAXIMUMNUM:
4779 return lowerFMinNumMaxNum(MI);
4780 case G_FMINIMUM:
4781 case G_FMAXIMUM:
4782 return lowerFMinimumMaximum(MI);
4783 case G_MERGE_VALUES:
4784 return lowerMergeValues(MI);
4785 case G_UNMERGE_VALUES:
4786 return lowerUnmergeValues(MI);
4787 case TargetOpcode::G_SEXT_INREG: {
4788 assert(MI.getOperand(2).isImm() && "Expected immediate");
4789 int64_t SizeInBits = MI.getOperand(2).getImm();
4790
4791 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4792 LLT DstTy = MRI.getType(DstReg);
4793 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4794
4795 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4796 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4797 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4798 MI.eraseFromParent();
4799 return Legalized;
4800 }
4801 case G_EXTRACT_VECTOR_ELT:
4802 case G_INSERT_VECTOR_ELT:
4804 case G_SHUFFLE_VECTOR:
4805 return lowerShuffleVector(MI);
4806 case G_VECTOR_COMPRESS:
4807 return lowerVECTOR_COMPRESS(MI);
4808 case G_DYN_STACKALLOC:
4809 return lowerDynStackAlloc(MI);
4810 case G_STACKSAVE:
4811 return lowerStackSave(MI);
4812 case G_STACKRESTORE:
4813 return lowerStackRestore(MI);
4814 case G_EXTRACT:
4815 return lowerExtract(MI);
4816 case G_INSERT:
4817 return lowerInsert(MI);
4818 case G_BSWAP:
4819 return lowerBswap(MI);
4820 case G_BITREVERSE:
4821 return lowerBitreverse(MI);
4822 case G_READ_REGISTER:
4823 case G_WRITE_REGISTER:
4824 return lowerReadWriteRegister(MI);
4825 case G_UADDSAT:
4826 case G_USUBSAT: {
4827 // Try to make a reasonable guess about which lowering strategy to use. The
4828 // target can override this with custom lowering and calling the
4829 // implementation functions.
4830 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4831 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4832 return lowerAddSubSatToMinMax(MI);
4834 }
4835 case G_SADDSAT:
4836 case G_SSUBSAT: {
4837 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4838
4839 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4840 // since it's a shorter expansion. However, we would need to figure out the
4841 // preferred boolean type for the carry out for the query.
4842 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4843 return lowerAddSubSatToMinMax(MI);
4845 }
4846 case G_SSHLSAT:
4847 case G_USHLSAT:
4848 return lowerShlSat(MI);
4849 case G_ABS:
4850 return lowerAbsToAddXor(MI);
4851 case G_ABDS:
4852 case G_ABDU: {
4853 bool IsSigned = MI.getOpcode() == G_ABDS;
4854 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4855 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4856 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4857 return lowerAbsDiffToMinMax(MI);
4858 }
4859 return lowerAbsDiffToSelect(MI);
4860 }
4861 case G_FABS:
4862 return lowerFAbs(MI);
4863 case G_SELECT:
4864 return lowerSelect(MI);
4865 case G_IS_FPCLASS:
4866 return lowerISFPCLASS(MI);
4867 case G_SDIVREM:
4868 case G_UDIVREM:
4869 return lowerDIVREM(MI);
4870 case G_FSHL:
4871 case G_FSHR:
4872 return lowerFunnelShift(MI);
4873 case G_ROTL:
4874 case G_ROTR:
4875 return lowerRotate(MI);
4876 case G_MEMSET:
4877 case G_MEMCPY:
4878 case G_MEMMOVE:
4879 return lowerMemCpyFamily(MI);
4880 case G_MEMCPY_INLINE:
4881 return lowerMemcpyInline(MI);
4882 case G_ZEXT:
4883 case G_SEXT:
4884 case G_ANYEXT:
4885 return lowerEXT(MI);
4886 case G_TRUNC:
4887 return lowerTRUNC(MI);
4889 return lowerVectorReduction(MI);
4890 case G_VAARG:
4891 return lowerVAArg(MI);
4892 case G_ATOMICRMW_SUB: {
4893 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4894 const LLT ValTy = MRI.getType(Val);
4895 MachineMemOperand *MMO = *MI.memoperands_begin();
4896
4897 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4898 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4899 MI.eraseFromParent();
4900 return Legalized;
4901 }
4902 }
4903}
4904
4906 Align MinAlign) const {
4907 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4908 // datalayout for the preferred alignment. Also there should be a target hook
4909 // for this to allow targets to reduce the alignment and ignore the
4910 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4911 // the type.
4912 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4913}
4914
4917 MachinePointerInfo &PtrInfo) {
4918 MachineFunction &MF = MIRBuilder.getMF();
4919 const DataLayout &DL = MIRBuilder.getDataLayout();
4920 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4921
4922 unsigned AddrSpace = DL.getAllocaAddrSpace();
4923 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4924
4925 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4926 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4927}
4928
4930 const SrcOp &Val) {
4931 LLT SrcTy = Val.getLLTTy(MRI);
4932 Align StackTypeAlign =
4933 std::max(getStackTemporaryAlignment(SrcTy),
4935 MachinePointerInfo PtrInfo;
4936 auto StackTemp =
4937 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4938
4939 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4940 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4941}
4942
4944 LLT VecTy) {
4945 LLT IdxTy = B.getMRI()->getType(IdxReg);
4946 unsigned NElts = VecTy.getNumElements();
4947
4948 int64_t IdxVal;
4949 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4950 if (IdxVal < VecTy.getNumElements())
4951 return IdxReg;
4952 // If a constant index would be out of bounds, clamp it as well.
4953 }
4954
4955 if (isPowerOf2_32(NElts)) {
4956 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4957 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4958 }
4959
4960 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4961 .getReg(0);
4962}
4963
4965 Register Index) {
4966 LLT EltTy = VecTy.getElementType();
4967
4968 // Calculate the element offset and add it to the pointer.
4969 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4970 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4971 "Converting bits to bytes lost precision");
4972
4973 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4974
4975 // Convert index to the correct size for the address space.
4976 const DataLayout &DL = MIRBuilder.getDataLayout();
4977 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4978 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4979 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4980 if (IdxTy != MRI.getType(Index))
4981 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4982
4983 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4984 MIRBuilder.buildConstant(IdxTy, EltSize));
4985
4986 LLT PtrTy = MRI.getType(VecPtr);
4987 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4988}
4989
4990#ifndef NDEBUG
4991/// Check that all vector operands have same number of elements. Other operands
4992/// should be listed in NonVecOp.
4995 std::initializer_list<unsigned> NonVecOpIndices) {
4996 if (MI.getNumMemOperands() != 0)
4997 return false;
4998
4999 LLT VecTy = MRI.getType(MI.getReg(0));
5000 if (!VecTy.isVector())
5001 return false;
5002 unsigned NumElts = VecTy.getNumElements();
5003
5004 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5005 MachineOperand &Op = MI.getOperand(OpIdx);
5006 if (!Op.isReg()) {
5007 if (!is_contained(NonVecOpIndices, OpIdx))
5008 return false;
5009 continue;
5010 }
5011
5012 LLT Ty = MRI.getType(Op.getReg());
5013 if (!Ty.isVector()) {
5014 if (!is_contained(NonVecOpIndices, OpIdx))
5015 return false;
5016 continue;
5017 }
5018
5019 if (Ty.getNumElements() != NumElts)
5020 return false;
5021 }
5022
5023 return true;
5024}
5025#endif
5026
5027/// Fill \p DstOps with DstOps that have same number of elements combined as
5028/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5029/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5030/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5031static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5032 unsigned NumElts) {
5033 LLT LeftoverTy;
5034 assert(Ty.isVector() && "Expected vector type");
5035 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5036 int NumParts, NumLeftover;
5037 std::tie(NumParts, NumLeftover) =
5038 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5039
5040 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5041 for (int i = 0; i < NumParts; ++i) {
5042 DstOps.push_back(NarrowTy);
5043 }
5044
5045 if (LeftoverTy.isValid()) {
5046 assert(NumLeftover == 1 && "expected exactly one leftover");
5047 DstOps.push_back(LeftoverTy);
5048 }
5049}
5050
5051/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5052/// made from \p Op depending on operand type.
5054 MachineOperand &Op) {
5055 for (unsigned i = 0; i < N; ++i) {
5056 if (Op.isReg())
5057 Ops.push_back(Op.getReg());
5058 else if (Op.isImm())
5059 Ops.push_back(Op.getImm());
5060 else if (Op.isPredicate())
5061 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5062 else
5063 llvm_unreachable("Unsupported type");
5064 }
5065}
5066
5067// Handle splitting vector operations which need to have the same number of
5068// elements in each type index, but each type index may have a different element
5069// type.
5070//
5071// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5072// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5073// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5074//
5075// Also handles some irregular breakdown cases, e.g.
5076// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5077// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5078// s64 = G_SHL s64, s32
5081 GenericMachineInstr &MI, unsigned NumElts,
5082 std::initializer_list<unsigned> NonVecOpIndices) {
5083 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5084 "Non-compatible opcode or not specified non-vector operands");
5085 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5086
5087 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5088 unsigned NumDefs = MI.getNumDefs();
5089
5090 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5091 // Build instructions with DstOps to use instruction found by CSE directly.
5092 // CSE copies found instruction into given vreg when building with vreg dest.
5093 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5094 // Output registers will be taken from created instructions.
5095 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5096 for (unsigned i = 0; i < NumDefs; ++i) {
5097 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5098 }
5099
5100 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5101 // Operands listed in NonVecOpIndices will be used as is without splitting;
5102 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5103 // scalar condition (op 1), immediate in sext_inreg (op 2).
5104 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5105 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5106 ++UseIdx, ++UseNo) {
5107 if (is_contained(NonVecOpIndices, UseIdx)) {
5108 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5109 MI.getOperand(UseIdx));
5110 } else {
5111 SmallVector<Register, 8> SplitPieces;
5112 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5113 MRI);
5114 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5115 }
5116 }
5117
5118 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5119
5120 // Take i-th piece of each input operand split and build sub-vector/scalar
5121 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5122 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5124 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5125 Defs.push_back(OutputOpsPieces[DstNo][i]);
5126
5128 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5129 Uses.push_back(InputOpsPieces[InputNo][i]);
5130
5131 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5132 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5133 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5134 }
5135
5136 // Merge small outputs into MI's output for each def operand.
5137 if (NumLeftovers) {
5138 for (unsigned i = 0; i < NumDefs; ++i)
5139 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5140 } else {
5141 for (unsigned i = 0; i < NumDefs; ++i)
5142 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5143 }
5144
5145 MI.eraseFromParent();
5146 return Legalized;
5147}
5148
5151 unsigned NumElts) {
5152 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5153
5154 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5155 unsigned NumDefs = MI.getNumDefs();
5156
5157 SmallVector<DstOp, 8> OutputOpsPieces;
5158 SmallVector<Register, 8> OutputRegs;
5159 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5160
5161 // Instructions that perform register split will be inserted in basic block
5162 // where register is defined (basic block is in the next operand).
5163 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5164 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5165 UseIdx += 2, ++UseNo) {
5166 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5167 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5168 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5169 MIRBuilder, MRI);
5170 }
5171
5172 // Build PHIs with fewer elements.
5173 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5174 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5175 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5176 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5177 Phi.addDef(
5178 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5179 OutputRegs.push_back(Phi.getReg(0));
5180
5181 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5182 Phi.addUse(InputOpsPieces[j][i]);
5183 Phi.add(MI.getOperand(1 + j * 2 + 1));
5184 }
5185 }
5186
5187 // Set the insert point after the existing PHIs
5188 MachineBasicBlock &MBB = *MI.getParent();
5189 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5190
5191 // Merge small outputs into MI's def.
5192 if (NumLeftovers) {
5193 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5194 } else {
5195 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5196 }
5197
5198 MI.eraseFromParent();
5199 return Legalized;
5200}
5201
5204 unsigned TypeIdx,
5205 LLT NarrowTy) {
5206 const int NumDst = MI.getNumOperands() - 1;
5207 const Register SrcReg = MI.getOperand(NumDst).getReg();
5208 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5209 LLT SrcTy = MRI.getType(SrcReg);
5210
5211 if (TypeIdx != 1 || NarrowTy == DstTy)
5212 return UnableToLegalize;
5213
5214 // Requires compatible types. Otherwise SrcReg should have been defined by
5215 // merge-like instruction that would get artifact combined. Most likely
5216 // instruction that defines SrcReg has to perform more/fewer elements
5217 // legalization compatible with NarrowTy.
5218 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5219 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5220
5221 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5222 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5223 return UnableToLegalize;
5224
5225 // This is most likely DstTy (smaller then register size) packed in SrcTy
5226 // (larger then register size) and since unmerge was not combined it will be
5227 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5228 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5229
5230 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5231 //
5232 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5233 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5234 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5235 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5236 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5237 const int PartsPerUnmerge = NumDst / NumUnmerge;
5238
5239 for (int I = 0; I != NumUnmerge; ++I) {
5240 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5241
5242 for (int J = 0; J != PartsPerUnmerge; ++J)
5243 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5244 MIB.addUse(Unmerge.getReg(I));
5245 }
5246
5247 MI.eraseFromParent();
5248 return Legalized;
5249}
5250
5253 LLT NarrowTy) {
5254 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5255 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5256 // that should have been artifact combined. Most likely instruction that uses
5257 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5258 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5259 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5260 if (NarrowTy == SrcTy)
5261 return UnableToLegalize;
5262
5263 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5264 // is for old mir tests. Since the changes to more/fewer elements it should no
5265 // longer be possible to generate MIR like this when starting from llvm-ir
5266 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5267 if (TypeIdx == 1) {
5268 assert(SrcTy.isVector() && "Expected vector types");
5269 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5270 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5271 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5272 return UnableToLegalize;
5273 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5274 //
5275 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5276 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5277 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5278 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5279 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5280 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5281
5283 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5284 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5285 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5286 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5287 Elts.push_back(Unmerge.getReg(j));
5288 }
5289
5290 SmallVector<Register, 8> NarrowTyElts;
5291 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5292 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5293 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5294 ++i, Offset += NumNarrowTyElts) {
5295 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5296 NarrowTyElts.push_back(
5297 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5298 }
5299
5300 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5301 MI.eraseFromParent();
5302 return Legalized;
5303 }
5304
5305 assert(TypeIdx == 0 && "Bad type index");
5306 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5307 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5308 return UnableToLegalize;
5309
5310 // This is most likely SrcTy (smaller then register size) packed in DstTy
5311 // (larger then register size) and since merge was not combined it will be
5312 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5313 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5314
5315 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5316 //
5317 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5318 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5319 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5320 SmallVector<Register, 8> NarrowTyElts;
5321 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5322 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5323 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5324 for (unsigned i = 0; i < NumParts; ++i) {
5326 for (unsigned j = 0; j < NumElts; ++j)
5327 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5328 NarrowTyElts.push_back(
5329 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5330 }
5331
5332 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5333 MI.eraseFromParent();
5334 return Legalized;
5335}
5336
5339 unsigned TypeIdx,
5340 LLT NarrowVecTy) {
5341 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5342 Register InsertVal;
5343 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5344
5345 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5346 if (IsInsert)
5347 InsertVal = MI.getOperand(2).getReg();
5348
5349 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5350 LLT VecTy = MRI.getType(SrcVec);
5351
5352 // If the index is a constant, we can really break this down as you would
5353 // expect, and index into the target size pieces.
5354 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5355 if (MaybeCst) {
5356 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5357 // Avoid out of bounds indexing the pieces.
5358 if (IdxVal >= VecTy.getNumElements()) {
5359 MIRBuilder.buildUndef(DstReg);
5360 MI.eraseFromParent();
5361 return Legalized;
5362 }
5363
5364 if (!NarrowVecTy.isVector()) {
5365 SmallVector<Register, 8> SplitPieces;
5366 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5367 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5368 if (IsInsert) {
5369 SplitPieces[IdxVal] = InsertVal;
5370 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5371 } else {
5372 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5373 }
5374 } else {
5375 SmallVector<Register, 8> VecParts;
5376 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5377
5378 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5379 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5380 TargetOpcode::G_ANYEXT);
5381
5382 unsigned NewNumElts = NarrowVecTy.getNumElements();
5383
5384 LLT IdxTy = MRI.getType(Idx);
5385 int64_t PartIdx = IdxVal / NewNumElts;
5386 auto NewIdx =
5387 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5388
5389 if (IsInsert) {
5390 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5391
5392 // Use the adjusted index to insert into one of the subvectors.
5393 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5394 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5395 VecParts[PartIdx] = InsertPart.getReg(0);
5396
5397 // Recombine the inserted subvector with the others to reform the result
5398 // vector.
5399 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5400 } else {
5401 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5402 }
5403 }
5404
5405 MI.eraseFromParent();
5406 return Legalized;
5407 }
5408
5409 // With a variable index, we can't perform the operation in a smaller type, so
5410 // we're forced to expand this.
5411 //
5412 // TODO: We could emit a chain of compare/select to figure out which piece to
5413 // index.
5415}
5416
5419 LLT NarrowTy) {
5420 // FIXME: Don't know how to handle secondary types yet.
5421 if (TypeIdx != 0)
5422 return UnableToLegalize;
5423
5424 if (!NarrowTy.isByteSized()) {
5425 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5426 return UnableToLegalize;
5427 }
5428
5429 // This implementation doesn't work for atomics. Give up instead of doing
5430 // something invalid.
5431 if (LdStMI.isAtomic())
5432 return UnableToLegalize;
5433
5434 bool IsLoad = isa<GLoad>(LdStMI);
5435 Register ValReg = LdStMI.getReg(0);
5436 Register AddrReg = LdStMI.getPointerReg();
5437 LLT ValTy = MRI.getType(ValReg);
5438
5439 // FIXME: Do we need a distinct NarrowMemory legalize action?
5440 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5441 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5442 return UnableToLegalize;
5443 }
5444
5445 int NumParts = -1;
5446 int NumLeftover = -1;
5447 LLT LeftoverTy;
5448 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5449 if (IsLoad) {
5450 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5451 } else {
5452 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5453 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5454 NumParts = NarrowRegs.size();
5455 NumLeftover = NarrowLeftoverRegs.size();
5456 }
5457 }
5458
5459 if (NumParts == -1)
5460 return UnableToLegalize;
5461
5462 LLT PtrTy = MRI.getType(AddrReg);
5463 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5464
5465 unsigned TotalSize = ValTy.getSizeInBits();
5466
5467 // Split the load/store into PartTy sized pieces starting at Offset. If this
5468 // is a load, return the new registers in ValRegs. For a store, each elements
5469 // of ValRegs should be PartTy. Returns the next offset that needs to be
5470 // handled.
5471 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5472 auto MMO = LdStMI.getMMO();
5473 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5474 unsigned NumParts, unsigned Offset) -> unsigned {
5475 MachineFunction &MF = MIRBuilder.getMF();
5476 unsigned PartSize = PartTy.getSizeInBits();
5477 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5478 ++Idx) {
5479 unsigned ByteOffset = Offset / 8;
5480 Register NewAddrReg;
5481
5482 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5483 ByteOffset);
5484
5485 MachineMemOperand *NewMMO =
5486 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5487
5488 if (IsLoad) {
5489 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5490 ValRegs.push_back(Dst);
5491 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5492 } else {
5493 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5494 }
5495 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5496 }
5497
5498 return Offset;
5499 };
5500
5501 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5502 unsigned HandledOffset =
5503 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5504
5505 // Handle the rest of the register if this isn't an even type breakdown.
5506 if (LeftoverTy.isValid())
5507 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5508
5509 if (IsLoad) {
5510 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5511 LeftoverTy, NarrowLeftoverRegs);
5512 }
5513
5514 LdStMI.eraseFromParent();
5515 return Legalized;
5516}
5517
5520 LLT NarrowTy) {
5521 using namespace TargetOpcode;
5523 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5524
5525 switch (MI.getOpcode()) {
5526 case G_IMPLICIT_DEF:
5527 case G_TRUNC:
5528 case G_AND:
5529 case G_OR:
5530 case G_XOR:
5531 case G_ADD:
5532 case G_SUB:
5533 case G_MUL:
5534 case G_PTR_ADD:
5535 case G_SMULH:
5536 case G_UMULH:
5537 case G_FADD:
5538 case G_FMUL:
5539 case G_FSUB:
5540 case G_FNEG:
5541 case G_FABS:
5542 case G_FCANONICALIZE:
5543 case G_FDIV:
5544 case G_FREM:
5545 case G_FMA:
5546 case G_FMAD:
5547 case G_FPOW:
5548 case G_FEXP:
5549 case G_FEXP2:
5550 case G_FEXP10:
5551 case G_FLOG:
5552 case G_FLOG2:
5553 case G_FLOG10:
5554 case G_FLDEXP:
5555 case G_FNEARBYINT:
5556 case G_FCEIL:
5557 case G_FFLOOR:
5558 case G_FRINT:
5559 case G_INTRINSIC_LRINT:
5560 case G_INTRINSIC_LLRINT:
5561 case G_INTRINSIC_ROUND:
5562 case G_INTRINSIC_ROUNDEVEN:
5563 case G_LROUND:
5564 case G_LLROUND:
5565 case G_INTRINSIC_TRUNC:
5566 case G_FMODF:
5567 case G_FCOS:
5568 case G_FSIN:
5569 case G_FTAN:
5570 case G_FACOS:
5571 case G_FASIN:
5572 case G_FATAN:
5573 case G_FATAN2:
5574 case G_FCOSH:
5575 case G_FSINH:
5576 case G_FTANH:
5577 case G_FSQRT:
5578 case G_BSWAP:
5579 case G_BITREVERSE:
5580 case G_SDIV:
5581 case G_UDIV:
5582 case G_SREM:
5583 case G_UREM:
5584 case G_SDIVREM:
5585 case G_UDIVREM:
5586 case G_SMIN:
5587 case G_SMAX:
5588 case G_UMIN:
5589 case G_UMAX:
5590 case G_ABS:
5591 case G_FMINNUM:
5592 case G_FMAXNUM:
5593 case G_FMINNUM_IEEE:
5594 case G_FMAXNUM_IEEE:
5595 case G_FMINIMUM:
5596 case G_FMAXIMUM:
5597 case G_FMINIMUMNUM:
5598 case G_FMAXIMUMNUM:
5599 case G_FSHL:
5600 case G_FSHR:
5601 case G_ROTL:
5602 case G_ROTR:
5603 case G_FREEZE:
5604 case G_SADDSAT:
5605 case G_SSUBSAT:
5606 case G_UADDSAT:
5607 case G_USUBSAT:
5608 case G_UMULO:
5609 case G_SMULO:
5610 case G_SHL:
5611 case G_LSHR:
5612 case G_ASHR:
5613 case G_SSHLSAT:
5614 case G_USHLSAT:
5615 case G_CTLZ:
5616 case G_CTLZ_ZERO_UNDEF:
5617 case G_CTTZ:
5618 case G_CTTZ_ZERO_UNDEF:
5619 case G_CTPOP:
5620 case G_FCOPYSIGN:
5621 case G_ZEXT:
5622 case G_SEXT:
5623 case G_ANYEXT:
5624 case G_FPEXT:
5625 case G_FPTRUNC:
5626 case G_SITOFP:
5627 case G_UITOFP:
5628 case G_FPTOSI:
5629 case G_FPTOUI:
5630 case G_FPTOSI_SAT:
5631 case G_FPTOUI_SAT:
5632 case G_INTTOPTR:
5633 case G_PTRTOINT:
5634 case G_ADDRSPACE_CAST:
5635 case G_UADDO:
5636 case G_USUBO:
5637 case G_UADDE:
5638 case G_USUBE:
5639 case G_SADDO:
5640 case G_SSUBO:
5641 case G_SADDE:
5642 case G_SSUBE:
5643 case G_STRICT_FADD:
5644 case G_STRICT_FSUB:
5645 case G_STRICT_FMUL:
5646 case G_STRICT_FMA:
5647 case G_STRICT_FLDEXP:
5648 case G_FFREXP:
5649 return fewerElementsVectorMultiEltType(GMI, NumElts);
5650 case G_ICMP:
5651 case G_FCMP:
5652 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5653 case G_IS_FPCLASS:
5654 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5655 case G_SELECT:
5656 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5657 return fewerElementsVectorMultiEltType(GMI, NumElts);
5658 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5659 case G_PHI:
5660 return fewerElementsVectorPhi(GMI, NumElts);
5661 case G_UNMERGE_VALUES:
5662 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5663 case G_BUILD_VECTOR:
5664 assert(TypeIdx == 0 && "not a vector type index");
5665 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5666 case G_CONCAT_VECTORS:
5667 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5668 return UnableToLegalize;
5669 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5670 case G_EXTRACT_VECTOR_ELT:
5671 case G_INSERT_VECTOR_ELT:
5672 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5673 case G_LOAD:
5674 case G_STORE:
5675 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5676 case G_SEXT_INREG:
5677 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5679 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5680 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5681 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5682 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5683 case G_SHUFFLE_VECTOR:
5684 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5685 case G_FPOWI:
5686 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5687 case G_BITCAST:
5688 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5689 case G_INTRINSIC_FPTRUNC_ROUND:
5690 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5691 default:
5692 return UnableToLegalize;
5693 }
5694}
5695
5698 LLT NarrowTy) {
5699 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5700 "Not a bitcast operation");
5701
5702 if (TypeIdx != 0)
5703 return UnableToLegalize;
5704
5705 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5706
5707 unsigned NewElemCount =
5708 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5709 SmallVector<Register> SrcVRegs, BitcastVRegs;
5710 if (NewElemCount == 1) {
5711 LLT SrcNarrowTy = SrcTy.getElementType();
5712
5713 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5714 getUnmergeResults(SrcVRegs, *Unmerge);
5715 } else {
5716 LLT SrcNarrowTy =
5718
5719 // Split the Src and Dst Reg into smaller registers
5720 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5721 return UnableToLegalize;
5722 }
5723
5724 // Build new smaller bitcast instructions
5725 // Not supporting Leftover types for now but will have to
5726 for (Register Reg : SrcVRegs)
5727 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5728
5729 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5730 MI.eraseFromParent();
5731 return Legalized;
5732}
5733
5735 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5736 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5737 if (TypeIdx != 0)
5738 return UnableToLegalize;
5739
5740 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5741 MI.getFirst3RegLLTs();
5742 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5743 // The shuffle should be canonicalized by now.
5744 if (DstTy != Src1Ty)
5745 return UnableToLegalize;
5746 if (DstTy != Src2Ty)
5747 return UnableToLegalize;
5748
5749 if (!isPowerOf2_32(DstTy.getNumElements()))
5750 return UnableToLegalize;
5751
5752 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5753 // Further legalization attempts will be needed to do split further.
5754 NarrowTy =
5755 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5756 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5757
5758 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5759 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5760 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5761 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5762 SplitSrc2Regs[1]};
5763
5764 Register Hi, Lo;
5765
5766 // If Lo or Hi uses elements from at most two of the four input vectors, then
5767 // express it as a vector shuffle of those two inputs. Otherwise extract the
5768 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5770 for (unsigned High = 0; High < 2; ++High) {
5771 Register &Output = High ? Hi : Lo;
5772
5773 // Build a shuffle mask for the output, discovering on the fly which
5774 // input vectors to use as shuffle operands (recorded in InputUsed).
5775 // If building a suitable shuffle vector proves too hard, then bail
5776 // out with useBuildVector set.
5777 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5778 unsigned FirstMaskIdx = High * NewElts;
5779 bool UseBuildVector = false;
5780 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5781 // The mask element. This indexes into the input.
5782 int Idx = Mask[FirstMaskIdx + MaskOffset];
5783
5784 // The input vector this mask element indexes into.
5785 unsigned Input = (unsigned)Idx / NewElts;
5786
5787 if (Input >= std::size(Inputs)) {
5788 // The mask element does not index into any input vector.
5789 Ops.push_back(-1);
5790 continue;
5791 }
5792
5793 // Turn the index into an offset from the start of the input vector.
5794 Idx -= Input * NewElts;
5795
5796 // Find or create a shuffle vector operand to hold this input.
5797 unsigned OpNo;
5798 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5799 if (InputUsed[OpNo] == Input) {
5800 // This input vector is already an operand.
5801 break;
5802 } else if (InputUsed[OpNo] == -1U) {
5803 // Create a new operand for this input vector.
5804 InputUsed[OpNo] = Input;
5805 break;
5806 }
5807 }
5808
5809 if (OpNo >= std::size(InputUsed)) {
5810 // More than two input vectors used! Give up on trying to create a
5811 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5812 UseBuildVector = true;
5813 break;
5814 }
5815
5816 // Add the mask index for the new shuffle vector.
5817 Ops.push_back(Idx + OpNo * NewElts);
5818 }
5819
5820 if (UseBuildVector) {
5821 LLT EltTy = NarrowTy.getElementType();
5823
5824 // Extract the input elements by hand.
5825 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5826 // The mask element. This indexes into the input.
5827 int Idx = Mask[FirstMaskIdx + MaskOffset];
5828
5829 // The input vector this mask element indexes into.
5830 unsigned Input = (unsigned)Idx / NewElts;
5831
5832 if (Input >= std::size(Inputs)) {
5833 // The mask element is "undef" or indexes off the end of the input.
5834 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5835 continue;
5836 }
5837
5838 // Turn the index into an offset from the start of the input vector.
5839 Idx -= Input * NewElts;
5840
5841 // Extract the vector element by hand.
5842 SVOps.push_back(MIRBuilder
5843 .buildExtractVectorElement(
5844 EltTy, Inputs[Input],
5845 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5846 .getReg(0));
5847 }
5848
5849 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5850 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5851 } else if (InputUsed[0] == -1U) {
5852 // No input vectors were used! The result is undefined.
5853 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5854 } else if (NewElts == 1) {
5855 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5856 } else {
5857 Register Op0 = Inputs[InputUsed[0]];
5858 // If only one input was used, use an undefined vector for the other.
5859 Register Op1 = InputUsed[1] == -1U
5860 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5861 : Inputs[InputUsed[1]];
5862 // At least one input vector was used. Create a new shuffle vector.
5863 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5864 }
5865
5866 Ops.clear();
5867 }
5868
5869 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5870 MI.eraseFromParent();
5871 return Legalized;
5872}
5873
5875 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5876 auto &RdxMI = cast<GVecReduce>(MI);
5877
5878 if (TypeIdx != 1)
5879 return UnableToLegalize;
5880
5881 // The semantics of the normal non-sequential reductions allow us to freely
5882 // re-associate the operation.
5883 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5884
5885 if (NarrowTy.isVector() &&
5886 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5887 return UnableToLegalize;
5888
5889 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5890 SmallVector<Register> SplitSrcs;
5891 // If NarrowTy is a scalar then we're being asked to scalarize.
5892 const unsigned NumParts =
5893 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5894 : SrcTy.getNumElements();
5895
5896 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5897 if (NarrowTy.isScalar()) {
5898 if (DstTy != NarrowTy)
5899 return UnableToLegalize; // FIXME: handle implicit extensions.
5900
5901 if (isPowerOf2_32(NumParts)) {
5902 // Generate a tree of scalar operations to reduce the critical path.
5903 SmallVector<Register> PartialResults;
5904 unsigned NumPartsLeft = NumParts;
5905 while (NumPartsLeft > 1) {
5906 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5907 PartialResults.emplace_back(
5909 .buildInstr(ScalarOpc, {NarrowTy},
5910 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5911 .getReg(0));
5912 }
5913 SplitSrcs = PartialResults;
5914 PartialResults.clear();
5915 NumPartsLeft = SplitSrcs.size();
5916 }
5917 assert(SplitSrcs.size() == 1);
5918 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5919 MI.eraseFromParent();
5920 return Legalized;
5921 }
5922 // If we can't generate a tree, then just do sequential operations.
5923 Register Acc = SplitSrcs[0];
5924 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5925 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5926 .getReg(0);
5927 MIRBuilder.buildCopy(DstReg, Acc);
5928 MI.eraseFromParent();
5929 return Legalized;
5930 }
5931 SmallVector<Register> PartialReductions;
5932 for (unsigned Part = 0; Part < NumParts; ++Part) {
5933 PartialReductions.push_back(
5934 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5935 .getReg(0));
5936 }
5937
5938 // If the types involved are powers of 2, we can generate intermediate vector
5939 // ops, before generating a final reduction operation.
5940 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5941 isPowerOf2_32(NarrowTy.getNumElements())) {
5942 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5943 }
5944
5945 Register Acc = PartialReductions[0];
5946 for (unsigned Part = 1; Part < NumParts; ++Part) {
5947 if (Part == NumParts - 1) {
5948 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5949 {Acc, PartialReductions[Part]});
5950 } else {
5951 Acc = MIRBuilder
5952 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5953 .getReg(0);
5954 }
5955 }
5956 MI.eraseFromParent();
5957 return Legalized;
5958}
5959
5962 unsigned int TypeIdx,
5963 LLT NarrowTy) {
5964 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5965 MI.getFirst3RegLLTs();
5966 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5967 DstTy != NarrowTy)
5968 return UnableToLegalize;
5969
5970 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5971 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5972 "Unexpected vecreduce opcode");
5973 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5974 ? TargetOpcode::G_FADD
5975 : TargetOpcode::G_FMUL;
5976
5977 SmallVector<Register> SplitSrcs;
5978 unsigned NumParts = SrcTy.getNumElements();
5979 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5980 Register Acc = ScalarReg;
5981 for (unsigned i = 0; i < NumParts; i++)
5982 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5983 .getReg(0);
5984
5985 MIRBuilder.buildCopy(DstReg, Acc);
5986 MI.eraseFromParent();
5987 return Legalized;
5988}
5989
5991LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5992 LLT SrcTy, LLT NarrowTy,
5993 unsigned ScalarOpc) {
5994 SmallVector<Register> SplitSrcs;
5995 // Split the sources into NarrowTy size pieces.
5996 extractParts(SrcReg, NarrowTy,
5997 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5998 MIRBuilder, MRI);
5999 // We're going to do a tree reduction using vector operations until we have
6000 // one NarrowTy size value left.
6001 while (SplitSrcs.size() > 1) {
6002 SmallVector<Register> PartialRdxs;
6003 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6004 Register LHS = SplitSrcs[Idx];
6005 Register RHS = SplitSrcs[Idx + 1];
6006 // Create the intermediate vector op.
6007 Register Res =
6008 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6009 PartialRdxs.push_back(Res);
6010 }
6011 SplitSrcs = std::move(PartialRdxs);
6012 }
6013 // Finally generate the requested NarrowTy based reduction.
6014 Observer.changingInstr(MI);
6015 MI.getOperand(1).setReg(SplitSrcs[0]);
6016 Observer.changedInstr(MI);
6017 return Legalized;
6018}
6019
6022 const LLT HalfTy, const LLT AmtTy) {
6023
6024 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6025 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6026 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6027
6028 if (Amt.isZero()) {
6029 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6030 MI.eraseFromParent();
6031 return Legalized;
6032 }
6033
6034 LLT NVT = HalfTy;
6035 unsigned NVTBits = HalfTy.getSizeInBits();
6036 unsigned VTBits = 2 * NVTBits;
6037
6038 SrcOp Lo(Register(0)), Hi(Register(0));
6039 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6040 if (Amt.ugt(VTBits)) {
6041 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6042 } else if (Amt.ugt(NVTBits)) {
6043 Lo = MIRBuilder.buildConstant(NVT, 0);
6044 Hi = MIRBuilder.buildShl(NVT, InL,
6045 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6046 } else if (Amt == NVTBits) {
6047 Lo = MIRBuilder.buildConstant(NVT, 0);
6048 Hi = InL;
6049 } else {
6050 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6051 auto OrLHS =
6052 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6053 auto OrRHS = MIRBuilder.buildLShr(
6054 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6055 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6056 }
6057 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6058 if (Amt.ugt(VTBits)) {
6059 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6060 } else if (Amt.ugt(NVTBits)) {
6061 Lo = MIRBuilder.buildLShr(NVT, InH,
6062 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6063 Hi = MIRBuilder.buildConstant(NVT, 0);
6064 } else if (Amt == NVTBits) {
6065 Lo = InH;
6066 Hi = MIRBuilder.buildConstant(NVT, 0);
6067 } else {
6068 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6069
6070 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6071 auto OrRHS = MIRBuilder.buildShl(
6072 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6073
6074 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6075 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6076 }
6077 } else {
6078 if (Amt.ugt(VTBits)) {
6079 Hi = Lo = MIRBuilder.buildAShr(
6080 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6081 } else if (Amt.ugt(NVTBits)) {
6082 Lo = MIRBuilder.buildAShr(NVT, InH,
6083 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6084 Hi = MIRBuilder.buildAShr(NVT, InH,
6085 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6086 } else if (Amt == NVTBits) {
6087 Lo = InH;
6088 Hi = MIRBuilder.buildAShr(NVT, InH,
6089 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6090 } else {
6091 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6092
6093 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6094 auto OrRHS = MIRBuilder.buildShl(
6095 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6096
6097 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6098 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6099 }
6100 }
6101
6102 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6103 MI.eraseFromParent();
6104
6105 return Legalized;
6106}
6107
6110 LLT RequestedTy) {
6111 if (TypeIdx == 1) {
6112 Observer.changingInstr(MI);
6113 narrowScalarSrc(MI, RequestedTy, 2);
6114 Observer.changedInstr(MI);
6115 return Legalized;
6116 }
6117
6118 Register DstReg = MI.getOperand(0).getReg();
6119 LLT DstTy = MRI.getType(DstReg);
6120 if (DstTy.isVector())
6121 return UnableToLegalize;
6122
6123 Register Amt = MI.getOperand(2).getReg();
6124 LLT ShiftAmtTy = MRI.getType(Amt);
6125 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6126 if (DstEltSize % 2 != 0)
6127 return UnableToLegalize;
6128
6129 // Check if we should use multi-way splitting instead of recursive binary
6130 // splitting.
6131 //
6132 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6133 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6134 // and dependency chains created by usual binary splitting approach
6135 // (128->64->32).
6136 //
6137 // The >= 8 parts threshold ensures we only use this optimization when binary
6138 // splitting would require multiple recursive passes, avoiding overhead for
6139 // simple 2-way splits where binary approach is sufficient.
6140 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6141 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6142 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6143 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6144 // steps).
6145 if (NumParts >= 8)
6146 return narrowScalarShiftMultiway(MI, RequestedTy);
6147 }
6148
6149 // Fall back to binary splitting:
6150 // Ignore the input type. We can only go to exactly half the size of the
6151 // input. If that isn't small enough, the resulting pieces will be further
6152 // legalized.
6153 const unsigned NewBitSize = DstEltSize / 2;
6154 const LLT HalfTy = LLT::scalar(NewBitSize);
6155 const LLT CondTy = LLT::scalar(1);
6156
6157 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6158 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6159 ShiftAmtTy);
6160 }
6161
6162 // TODO: Expand with known bits.
6163
6164 // Handle the fully general expansion by an unknown amount.
6165 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6166
6167 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6168 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6169 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6170
6171 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6172 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6173
6174 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6175 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6176 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6177
6178 Register ResultRegs[2];
6179 switch (MI.getOpcode()) {
6180 case TargetOpcode::G_SHL: {
6181 // Short: ShAmt < NewBitSize
6182 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6183
6184 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6185 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6186 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6187
6188 // Long: ShAmt >= NewBitSize
6189 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6190 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6191
6192 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6193 auto Hi = MIRBuilder.buildSelect(
6194 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6195
6196 ResultRegs[0] = Lo.getReg(0);
6197 ResultRegs[1] = Hi.getReg(0);
6198 break;
6199 }
6200 case TargetOpcode::G_LSHR:
6201 case TargetOpcode::G_ASHR: {
6202 // Short: ShAmt < NewBitSize
6203 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6204
6205 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6206 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6207 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6208
6209 // Long: ShAmt >= NewBitSize
6211 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6212 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6213 } else {
6214 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6215 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6216 }
6217 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6218 {InH, AmtExcess}); // Lo from Hi part.
6219
6220 auto Lo = MIRBuilder.buildSelect(
6221 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6222
6223 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6224
6225 ResultRegs[0] = Lo.getReg(0);
6226 ResultRegs[1] = Hi.getReg(0);
6227 break;
6228 }
6229 default:
6230 llvm_unreachable("not a shift");
6231 }
6232
6233 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6234 MI.eraseFromParent();
6235 return Legalized;
6236}
6237
6239 unsigned PartIdx,
6240 unsigned NumParts,
6241 ArrayRef<Register> SrcParts,
6242 const ShiftParams &Params,
6243 LLT TargetTy, LLT ShiftAmtTy) {
6244 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6245 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6246 assert(WordShiftConst && BitShiftConst && "Expected constants");
6247
6248 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6249 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6250 const bool NeedsInterWordShift = ShiftBits != 0;
6251
6252 switch (Opcode) {
6253 case TargetOpcode::G_SHL: {
6254 // Data moves from lower indices to higher indices
6255 // If this part would come from a source beyond our range, it's zero
6256 if (PartIdx < ShiftWords)
6257 return Params.Zero;
6258
6259 unsigned SrcIdx = PartIdx - ShiftWords;
6260 if (!NeedsInterWordShift)
6261 return SrcParts[SrcIdx];
6262
6263 // Combine shifted main part with carry from previous part
6264 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6265 if (SrcIdx > 0) {
6266 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6267 Params.InvBitShift);
6268 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6269 }
6270 return Hi.getReg(0);
6271 }
6272
6273 case TargetOpcode::G_LSHR: {
6274 unsigned SrcIdx = PartIdx + ShiftWords;
6275 if (SrcIdx >= NumParts)
6276 return Params.Zero;
6277 if (!NeedsInterWordShift)
6278 return SrcParts[SrcIdx];
6279
6280 // Combine shifted main part with carry from next part
6281 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6282 if (SrcIdx + 1 < NumParts) {
6283 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6284 Params.InvBitShift);
6285 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6286 }
6287 return Lo.getReg(0);
6288 }
6289
6290 case TargetOpcode::G_ASHR: {
6291 // Like LSHR but preserves sign bit
6292 unsigned SrcIdx = PartIdx + ShiftWords;
6293 if (SrcIdx >= NumParts)
6294 return Params.SignBit;
6295 if (!NeedsInterWordShift)
6296 return SrcParts[SrcIdx];
6297
6298 // Only the original MSB part uses arithmetic shift to preserve sign. All
6299 // other parts use logical shift since they're just moving data bits.
6300 auto Lo =
6301 (SrcIdx == NumParts - 1)
6302 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6303 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6304 Register HiSrc =
6305 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6306 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6307 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6308 }
6309
6310 default:
6311 llvm_unreachable("not a shift");
6312 }
6313}
6314
6316 Register MainOperand,
6317 Register ShiftAmt,
6318 LLT TargetTy,
6319 Register CarryOperand) {
6320 // This helper generates a single output part for variable shifts by combining
6321 // the main operand (shifted by BitShift) with carry bits from an adjacent
6322 // part.
6323
6324 // For G_ASHR, individual parts don't have their own sign bit, only the
6325 // complete value does. So we use LSHR for the main operand shift in ASHR
6326 // context.
6327 unsigned MainOpcode =
6328 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6329
6330 // Perform the primary shift on the main operand
6331 Register MainShifted =
6332 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6333 .getReg(0);
6334
6335 // No carry operand available
6336 if (!CarryOperand.isValid())
6337 return MainShifted;
6338
6339 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6340 // so carry bits aren't needed.
6341 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6342 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6343 LLT BoolTy = LLT::scalar(1);
6344 auto IsZeroBitShift =
6345 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6346
6347 // Extract bits from the adjacent part that will "carry over" into this part.
6348 // The carry direction is opposite to the main shift direction, so we can
6349 // align the two shifted values before combining them with OR.
6350
6351 // Determine the carry shift opcode (opposite direction)
6352 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6353 : TargetOpcode::G_SHL;
6354
6355 // Calculate inverse shift amount: BitWidth - ShiftAmt
6356 auto TargetBitsConst =
6357 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6358 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6359
6360 // Shift the carry operand
6361 Register CarryBits =
6363 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6364 .getReg(0);
6365
6366 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6367 // TargetBits which would be poison for the individual carry shift operation).
6368 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6369 Register SafeCarryBits =
6370 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6371 .getReg(0);
6372
6373 // Combine the main shifted part with the carry bits
6374 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6375}
6376
6379 const APInt &Amt,
6380 LLT TargetTy,
6381 LLT ShiftAmtTy) {
6382 // Any wide shift can be decomposed into WordShift + BitShift components.
6383 // When shift amount is known constant, directly compute the decomposition
6384 // values and generate constant registers.
6385 Register DstReg = MI.getOperand(0).getReg();
6386 Register SrcReg = MI.getOperand(1).getReg();
6387 LLT DstTy = MRI.getType(DstReg);
6388
6389 const unsigned DstBits = DstTy.getScalarSizeInBits();
6390 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6391 const unsigned NumParts = DstBits / TargetBits;
6392
6393 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6394
6395 // When the shift amount is known at compile time, we just calculate which
6396 // source parts contribute to each output part.
6397
6398 SmallVector<Register, 8> SrcParts;
6399 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6400
6401 if (Amt.isZero()) {
6402 // No shift needed, just copy
6403 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6404 MI.eraseFromParent();
6405 return Legalized;
6406 }
6407
6408 ShiftParams Params;
6409 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6410 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6411
6412 // Generate constants and values needed by all shift types
6413 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6414 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6415 Params.InvBitShift =
6416 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6417 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6418
6419 // For ASHR, we need the sign-extended value to fill shifted-out positions
6420 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6421 Params.SignBit =
6423 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6424 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6425 .getReg(0);
6426
6427 SmallVector<Register, 8> DstParts(NumParts);
6428 for (unsigned I = 0; I < NumParts; ++I)
6429 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6430 Params, TargetTy, ShiftAmtTy);
6431
6432 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6433 MI.eraseFromParent();
6434 return Legalized;
6435}
6436
6439 Register DstReg = MI.getOperand(0).getReg();
6440 Register SrcReg = MI.getOperand(1).getReg();
6441 Register AmtReg = MI.getOperand(2).getReg();
6442 LLT DstTy = MRI.getType(DstReg);
6443 LLT ShiftAmtTy = MRI.getType(AmtReg);
6444
6445 const unsigned DstBits = DstTy.getScalarSizeInBits();
6446 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6447 const unsigned NumParts = DstBits / TargetBits;
6448
6449 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6450 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6451
6452 // If the shift amount is known at compile time, we can use direct indexing
6453 // instead of generating select chains in the general case.
6454 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6455 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6456 ShiftAmtTy);
6457
6458 // For runtime-variable shift amounts, we must generate a more complex
6459 // sequence that handles all possible shift values using select chains.
6460
6461 // Split the input into target-sized pieces
6462 SmallVector<Register, 8> SrcParts;
6463 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6464
6465 // Shifting by zero should be a no-op.
6466 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6467 LLT BoolTy = LLT::scalar(1);
6468 auto IsZeroShift =
6469 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6470
6471 // Any wide shift can be decomposed into two components:
6472 // 1. WordShift: number of complete target-sized words to shift
6473 // 2. BitShift: number of bits to shift within each word
6474 //
6475 // Example: 128-bit >> 50 with 32-bit target:
6476 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6477 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6478 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6479 auto TargetBitsLog2Const =
6480 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6481 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6482
6483 Register WordShift =
6484 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6485 Register BitShift =
6486 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6487
6488 // Fill values:
6489 // - SHL/LSHR: fill with zeros
6490 // - ASHR: fill with sign-extended MSB
6491 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6492
6493 Register FillValue;
6494 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6495 auto TargetBitsMinusOneConst =
6496 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6497 FillValue = MIRBuilder
6498 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6499 TargetBitsMinusOneConst)
6500 .getReg(0);
6501 } else {
6502 FillValue = ZeroReg;
6503 }
6504
6505 SmallVector<Register, 8> DstParts(NumParts);
6506
6507 // For each output part, generate a select chain that chooses the correct
6508 // result based on the runtime WordShift value. This handles all possible
6509 // word shift amounts by pre-calculating what each would produce.
6510 for (unsigned I = 0; I < NumParts; ++I) {
6511 // Initialize with appropriate default value for this shift type
6512 Register InBoundsResult = FillValue;
6513
6514 // clang-format off
6515 // Build a branchless select chain by pre-computing results for all possible
6516 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6517 //
6518 // K=0: select(WordShift==0, result0, FillValue)
6519 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6520 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6521 // clang-format on
6522 for (unsigned K = 0; K < NumParts; ++K) {
6523 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6524 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6525 WordShift, WordShiftKConst);
6526
6527 // Calculate source indices for this word shift
6528 //
6529 // For 4-part 128-bit value with K=1 word shift:
6530 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6531 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6532 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6533 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6534 int MainSrcIdx;
6535 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6536
6537 switch (MI.getOpcode()) {
6538 case TargetOpcode::G_SHL:
6539 MainSrcIdx = (int)I - (int)K;
6540 CarrySrcIdx = MainSrcIdx - 1;
6541 break;
6542 case TargetOpcode::G_LSHR:
6543 case TargetOpcode::G_ASHR:
6544 MainSrcIdx = (int)I + (int)K;
6545 CarrySrcIdx = MainSrcIdx + 1;
6546 break;
6547 default:
6548 llvm_unreachable("Not a shift");
6549 }
6550
6551 // Check bounds and build the result for this word shift
6552 Register ResultForK;
6553 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6554 Register MainOp = SrcParts[MainSrcIdx];
6555 Register CarryOp;
6556
6557 // Determine carry operand with bounds checking
6558 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6559 CarryOp = SrcParts[CarrySrcIdx];
6560 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6561 CarrySrcIdx >= (int)NumParts)
6562 CarryOp = FillValue; // Use sign extension
6563
6564 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6565 TargetTy, CarryOp);
6566 } else {
6567 // Out of bounds - use fill value for this k
6568 ResultForK = FillValue;
6569 }
6570
6571 // Select this result if WordShift equals k
6572 InBoundsResult =
6574 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6575 .getReg(0);
6576 }
6577
6578 // Handle zero-shift special case: if shift is 0, use original input
6579 DstParts[I] =
6581 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6582 .getReg(0);
6583 }
6584
6585 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6586 MI.eraseFromParent();
6587 return Legalized;
6588}
6589
6592 LLT MoreTy) {
6593 assert(TypeIdx == 0 && "Expecting only Idx 0");
6594
6595 Observer.changingInstr(MI);
6596 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6597 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6598 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6599 moreElementsVectorSrc(MI, MoreTy, I);
6600 }
6601
6602 MachineBasicBlock &MBB = *MI.getParent();
6603 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6604 moreElementsVectorDst(MI, MoreTy, 0);
6605 Observer.changedInstr(MI);
6606 return Legalized;
6607}
6608
6609MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6610 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6611 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6612
6613 switch (Opcode) {
6614 default:
6616 "getNeutralElementForVecReduce called with invalid opcode!");
6617 case TargetOpcode::G_VECREDUCE_ADD:
6618 case TargetOpcode::G_VECREDUCE_OR:
6619 case TargetOpcode::G_VECREDUCE_XOR:
6620 case TargetOpcode::G_VECREDUCE_UMAX:
6621 return MIRBuilder.buildConstant(Ty, 0);
6622 case TargetOpcode::G_VECREDUCE_MUL:
6623 return MIRBuilder.buildConstant(Ty, 1);
6624 case TargetOpcode::G_VECREDUCE_AND:
6625 case TargetOpcode::G_VECREDUCE_UMIN:
6627 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6628 case TargetOpcode::G_VECREDUCE_SMAX:
6630 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6631 case TargetOpcode::G_VECREDUCE_SMIN:
6633 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6634 case TargetOpcode::G_VECREDUCE_FADD:
6635 return MIRBuilder.buildFConstant(Ty, -0.0);
6636 case TargetOpcode::G_VECREDUCE_FMUL:
6637 return MIRBuilder.buildFConstant(Ty, 1.0);
6638 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6639 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6640 assert(false && "getNeutralElementForVecReduce unimplemented for "
6641 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6642 }
6643 llvm_unreachable("switch expected to return!");
6644}
6645
6648 LLT MoreTy) {
6649 unsigned Opc = MI.getOpcode();
6650 switch (Opc) {
6651 case TargetOpcode::G_IMPLICIT_DEF:
6652 case TargetOpcode::G_LOAD: {
6653 if (TypeIdx != 0)
6654 return UnableToLegalize;
6655 Observer.changingInstr(MI);
6656 moreElementsVectorDst(MI, MoreTy, 0);
6657 Observer.changedInstr(MI);
6658 return Legalized;
6659 }
6660 case TargetOpcode::G_STORE:
6661 if (TypeIdx != 0)
6662 return UnableToLegalize;
6663 Observer.changingInstr(MI);
6664 moreElementsVectorSrc(MI, MoreTy, 0);
6665 Observer.changedInstr(MI);
6666 return Legalized;
6667 case TargetOpcode::G_AND:
6668 case TargetOpcode::G_OR:
6669 case TargetOpcode::G_XOR:
6670 case TargetOpcode::G_ADD:
6671 case TargetOpcode::G_SUB:
6672 case TargetOpcode::G_MUL:
6673 case TargetOpcode::G_FADD:
6674 case TargetOpcode::G_FSUB:
6675 case TargetOpcode::G_FMUL:
6676 case TargetOpcode::G_FDIV:
6677 case TargetOpcode::G_FCOPYSIGN:
6678 case TargetOpcode::G_UADDSAT:
6679 case TargetOpcode::G_USUBSAT:
6680 case TargetOpcode::G_SADDSAT:
6681 case TargetOpcode::G_SSUBSAT:
6682 case TargetOpcode::G_SMIN:
6683 case TargetOpcode::G_SMAX:
6684 case TargetOpcode::G_UMIN:
6685 case TargetOpcode::G_UMAX:
6686 case TargetOpcode::G_FMINNUM:
6687 case TargetOpcode::G_FMAXNUM:
6688 case TargetOpcode::G_FMINNUM_IEEE:
6689 case TargetOpcode::G_FMAXNUM_IEEE:
6690 case TargetOpcode::G_FMINIMUM:
6691 case TargetOpcode::G_FMAXIMUM:
6692 case TargetOpcode::G_FMINIMUMNUM:
6693 case TargetOpcode::G_FMAXIMUMNUM:
6694 case TargetOpcode::G_STRICT_FADD:
6695 case TargetOpcode::G_STRICT_FSUB:
6696 case TargetOpcode::G_STRICT_FMUL: {
6697 Observer.changingInstr(MI);
6698 moreElementsVectorSrc(MI, MoreTy, 1);
6699 moreElementsVectorSrc(MI, MoreTy, 2);
6700 moreElementsVectorDst(MI, MoreTy, 0);
6701 Observer.changedInstr(MI);
6702 return Legalized;
6703 }
6704 case TargetOpcode::G_SHL:
6705 case TargetOpcode::G_ASHR:
6706 case TargetOpcode::G_LSHR: {
6707 Observer.changingInstr(MI);
6708 moreElementsVectorSrc(MI, MoreTy, 1);
6709 // The shift operand may have a different scalar type from the source and
6710 // destination operands.
6711 LLT ShiftMoreTy = MoreTy.changeElementType(
6712 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6713 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6714 moreElementsVectorDst(MI, MoreTy, 0);
6715 Observer.changedInstr(MI);
6716 return Legalized;
6717 }
6718 case TargetOpcode::G_FMA:
6719 case TargetOpcode::G_STRICT_FMA:
6720 case TargetOpcode::G_FSHR:
6721 case TargetOpcode::G_FSHL: {
6722 Observer.changingInstr(MI);
6723 moreElementsVectorSrc(MI, MoreTy, 1);
6724 moreElementsVectorSrc(MI, MoreTy, 2);
6725 moreElementsVectorSrc(MI, MoreTy, 3);
6726 moreElementsVectorDst(MI, MoreTy, 0);
6727 Observer.changedInstr(MI);
6728 return Legalized;
6729 }
6730 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6731 case TargetOpcode::G_EXTRACT:
6732 if (TypeIdx != 1)
6733 return UnableToLegalize;
6734 Observer.changingInstr(MI);
6735 moreElementsVectorSrc(MI, MoreTy, 1);
6736 Observer.changedInstr(MI);
6737 return Legalized;
6738 case TargetOpcode::G_INSERT:
6739 case TargetOpcode::G_INSERT_VECTOR_ELT:
6740 case TargetOpcode::G_FREEZE:
6741 case TargetOpcode::G_FNEG:
6742 case TargetOpcode::G_FABS:
6743 case TargetOpcode::G_FSQRT:
6744 case TargetOpcode::G_FCEIL:
6745 case TargetOpcode::G_FFLOOR:
6746 case TargetOpcode::G_FNEARBYINT:
6747 case TargetOpcode::G_FRINT:
6748 case TargetOpcode::G_INTRINSIC_ROUND:
6749 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6750 case TargetOpcode::G_INTRINSIC_TRUNC:
6751 case TargetOpcode::G_BITREVERSE:
6752 case TargetOpcode::G_BSWAP:
6753 case TargetOpcode::G_FCANONICALIZE:
6754 case TargetOpcode::G_SEXT_INREG:
6755 case TargetOpcode::G_ABS:
6756 case TargetOpcode::G_CTLZ:
6757 case TargetOpcode::G_CTPOP:
6758 if (TypeIdx != 0)
6759 return UnableToLegalize;
6760 Observer.changingInstr(MI);
6761 moreElementsVectorSrc(MI, MoreTy, 1);
6762 moreElementsVectorDst(MI, MoreTy, 0);
6763 Observer.changedInstr(MI);
6764 return Legalized;
6765 case TargetOpcode::G_SELECT: {
6766 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6767 if (TypeIdx == 1) {
6768 if (!CondTy.isScalar() ||
6769 DstTy.getElementCount() != MoreTy.getElementCount())
6770 return UnableToLegalize;
6771
6772 // This is turning a scalar select of vectors into a vector
6773 // select. Broadcast the select condition.
6774 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6775 Observer.changingInstr(MI);
6776 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6777 Observer.changedInstr(MI);
6778 return Legalized;
6779 }
6780
6781 if (CondTy.isVector())
6782 return UnableToLegalize;
6783
6784 Observer.changingInstr(MI);
6785 moreElementsVectorSrc(MI, MoreTy, 2);
6786 moreElementsVectorSrc(MI, MoreTy, 3);
6787 moreElementsVectorDst(MI, MoreTy, 0);
6788 Observer.changedInstr(MI);
6789 return Legalized;
6790 }
6791 case TargetOpcode::G_UNMERGE_VALUES:
6792 return UnableToLegalize;
6793 case TargetOpcode::G_PHI:
6794 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6795 case TargetOpcode::G_SHUFFLE_VECTOR:
6796 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6797 case TargetOpcode::G_BUILD_VECTOR: {
6799 for (auto Op : MI.uses()) {
6800 Elts.push_back(Op.getReg());
6801 }
6802
6803 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6804 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6805 }
6806
6807 MIRBuilder.buildDeleteTrailingVectorElements(
6808 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6809 MI.eraseFromParent();
6810 return Legalized;
6811 }
6812 case TargetOpcode::G_SEXT:
6813 case TargetOpcode::G_ZEXT:
6814 case TargetOpcode::G_ANYEXT:
6815 case TargetOpcode::G_TRUNC:
6816 case TargetOpcode::G_FPTRUNC:
6817 case TargetOpcode::G_FPEXT:
6818 case TargetOpcode::G_FPTOSI:
6819 case TargetOpcode::G_FPTOUI:
6820 case TargetOpcode::G_FPTOSI_SAT:
6821 case TargetOpcode::G_FPTOUI_SAT:
6822 case TargetOpcode::G_SITOFP:
6823 case TargetOpcode::G_UITOFP: {
6824 Observer.changingInstr(MI);
6825 LLT SrcExtTy;
6826 LLT DstExtTy;
6827 if (TypeIdx == 0) {
6828 DstExtTy = MoreTy;
6829 SrcExtTy = MoreTy.changeElementType(
6830 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6831 } else {
6832 DstExtTy = MoreTy.changeElementType(
6833 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6834 SrcExtTy = MoreTy;
6835 }
6836 moreElementsVectorSrc(MI, SrcExtTy, 1);
6837 moreElementsVectorDst(MI, DstExtTy, 0);
6838 Observer.changedInstr(MI);
6839 return Legalized;
6840 }
6841 case TargetOpcode::G_ICMP:
6842 case TargetOpcode::G_FCMP: {
6843 if (TypeIdx != 1)
6844 return UnableToLegalize;
6845
6846 Observer.changingInstr(MI);
6847 moreElementsVectorSrc(MI, MoreTy, 2);
6848 moreElementsVectorSrc(MI, MoreTy, 3);
6849 LLT CondTy = MoreTy.changeVectorElementType(
6850 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6851 moreElementsVectorDst(MI, CondTy, 0);
6852 Observer.changedInstr(MI);
6853 return Legalized;
6854 }
6855 case TargetOpcode::G_BITCAST: {
6856 if (TypeIdx != 0)
6857 return UnableToLegalize;
6858
6859 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6860 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6861
6862 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6863 if (coefficient % DstTy.getNumElements() != 0)
6864 return UnableToLegalize;
6865
6866 coefficient = coefficient / DstTy.getNumElements();
6867
6868 LLT NewTy = SrcTy.changeElementCount(
6869 ElementCount::get(coefficient, MoreTy.isScalable()));
6870 Observer.changingInstr(MI);
6871 moreElementsVectorSrc(MI, NewTy, 1);
6872 moreElementsVectorDst(MI, MoreTy, 0);
6873 Observer.changedInstr(MI);
6874 return Legalized;
6875 }
6876 case TargetOpcode::G_VECREDUCE_FADD:
6877 case TargetOpcode::G_VECREDUCE_FMUL:
6878 case TargetOpcode::G_VECREDUCE_ADD:
6879 case TargetOpcode::G_VECREDUCE_MUL:
6880 case TargetOpcode::G_VECREDUCE_AND:
6881 case TargetOpcode::G_VECREDUCE_OR:
6882 case TargetOpcode::G_VECREDUCE_XOR:
6883 case TargetOpcode::G_VECREDUCE_SMAX:
6884 case TargetOpcode::G_VECREDUCE_SMIN:
6885 case TargetOpcode::G_VECREDUCE_UMAX:
6886 case TargetOpcode::G_VECREDUCE_UMIN: {
6887 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6888 MachineOperand &MO = MI.getOperand(1);
6889 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6890 auto NeutralElement = getNeutralElementForVecReduce(
6891 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6892
6893 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6894 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6895 i != e; i++) {
6896 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6897 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6898 NeutralElement, Idx);
6899 }
6900
6901 Observer.changingInstr(MI);
6902 MO.setReg(NewVec.getReg(0));
6903 Observer.changedInstr(MI);
6904 return Legalized;
6905 }
6906
6907 default:
6908 return UnableToLegalize;
6909 }
6910}
6911
6914 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6915 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6916 unsigned MaskNumElts = Mask.size();
6917 unsigned SrcNumElts = SrcTy.getNumElements();
6918 LLT DestEltTy = DstTy.getElementType();
6919
6920 if (MaskNumElts == SrcNumElts)
6921 return Legalized;
6922
6923 if (MaskNumElts < SrcNumElts) {
6924 // Extend mask to match new destination vector size with
6925 // undef values.
6926 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6927 llvm::copy(Mask, NewMask.begin());
6928
6929 moreElementsVectorDst(MI, SrcTy, 0);
6930 MIRBuilder.setInstrAndDebugLoc(MI);
6931 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6932 MI.getOperand(1).getReg(),
6933 MI.getOperand(2).getReg(), NewMask);
6934 MI.eraseFromParent();
6935
6936 return Legalized;
6937 }
6938
6939 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6940 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6941 LLT PaddedTy =
6942 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6943
6944 // Create new source vectors by concatenating the initial
6945 // source vectors with undefined vectors of the same size.
6946 auto Undef = MIRBuilder.buildUndef(SrcTy);
6947 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6948 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6949 MOps1[0] = MI.getOperand(1).getReg();
6950 MOps2[0] = MI.getOperand(2).getReg();
6951
6952 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6953 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6954
6955 // Readjust mask for new input vector length.
6956 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6957 for (unsigned I = 0; I != MaskNumElts; ++I) {
6958 int Idx = Mask[I];
6959 if (Idx >= static_cast<int>(SrcNumElts))
6960 Idx += PaddedMaskNumElts - SrcNumElts;
6961 MappedOps[I] = Idx;
6962 }
6963
6964 // If we got more elements than required, extract subvector.
6965 if (MaskNumElts != PaddedMaskNumElts) {
6966 auto Shuffle =
6967 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6968
6969 SmallVector<Register, 16> Elts(MaskNumElts);
6970 for (unsigned I = 0; I < MaskNumElts; ++I) {
6971 Elts[I] =
6972 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6973 .getReg(0);
6974 }
6975 MIRBuilder.buildBuildVector(DstReg, Elts);
6976 } else {
6977 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6978 }
6979
6980 MI.eraseFromParent();
6982}
6983
6986 unsigned int TypeIdx, LLT MoreTy) {
6987 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6988 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6989 unsigned NumElts = DstTy.getNumElements();
6990 unsigned WidenNumElts = MoreTy.getNumElements();
6991
6992 if (DstTy.isVector() && Src1Ty.isVector() &&
6993 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6995 }
6996
6997 if (TypeIdx != 0)
6998 return UnableToLegalize;
6999
7000 // Expect a canonicalized shuffle.
7001 if (DstTy != Src1Ty || DstTy != Src2Ty)
7002 return UnableToLegalize;
7003
7004 moreElementsVectorSrc(MI, MoreTy, 1);
7005 moreElementsVectorSrc(MI, MoreTy, 2);
7006
7007 // Adjust mask based on new input vector length.
7008 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7009 for (unsigned I = 0; I != NumElts; ++I) {
7010 int Idx = Mask[I];
7011 if (Idx < static_cast<int>(NumElts))
7012 NewMask[I] = Idx;
7013 else
7014 NewMask[I] = Idx - NumElts + WidenNumElts;
7015 }
7016 moreElementsVectorDst(MI, MoreTy, 0);
7017 MIRBuilder.setInstrAndDebugLoc(MI);
7018 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7019 MI.getOperand(1).getReg(),
7020 MI.getOperand(2).getReg(), NewMask);
7021 MI.eraseFromParent();
7022 return Legalized;
7023}
7024
7025void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7026 ArrayRef<Register> Src1Regs,
7027 ArrayRef<Register> Src2Regs,
7028 LLT NarrowTy) {
7030 unsigned SrcParts = Src1Regs.size();
7031 unsigned DstParts = DstRegs.size();
7032
7033 unsigned DstIdx = 0; // Low bits of the result.
7034 Register FactorSum =
7035 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7036 DstRegs[DstIdx] = FactorSum;
7037
7038 Register CarrySumPrevDstIdx;
7040
7041 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7042 // Collect low parts of muls for DstIdx.
7043 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7044 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7046 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7047 Factors.push_back(Mul.getReg(0));
7048 }
7049 // Collect high parts of muls from previous DstIdx.
7050 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7051 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7052 MachineInstrBuilder Umulh =
7053 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7054 Factors.push_back(Umulh.getReg(0));
7055 }
7056 // Add CarrySum from additions calculated for previous DstIdx.
7057 if (DstIdx != 1) {
7058 Factors.push_back(CarrySumPrevDstIdx);
7059 }
7060
7061 Register CarrySum;
7062 // Add all factors and accumulate all carries into CarrySum.
7063 if (DstIdx != DstParts - 1) {
7064 MachineInstrBuilder Uaddo =
7065 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7066 FactorSum = Uaddo.getReg(0);
7067 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7068 for (unsigned i = 2; i < Factors.size(); ++i) {
7069 MachineInstrBuilder Uaddo =
7070 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7071 FactorSum = Uaddo.getReg(0);
7072 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7073 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7074 }
7075 } else {
7076 // Since value for the next index is not calculated, neither is CarrySum.
7077 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7078 for (unsigned i = 2; i < Factors.size(); ++i)
7079 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7080 }
7081
7082 CarrySumPrevDstIdx = CarrySum;
7083 DstRegs[DstIdx] = FactorSum;
7084 Factors.clear();
7085 }
7086}
7087
7090 LLT NarrowTy) {
7091 if (TypeIdx != 0)
7092 return UnableToLegalize;
7093
7094 Register DstReg = MI.getOperand(0).getReg();
7095 LLT DstType = MRI.getType(DstReg);
7096 // FIXME: add support for vector types
7097 if (DstType.isVector())
7098 return UnableToLegalize;
7099
7100 unsigned Opcode = MI.getOpcode();
7101 unsigned OpO, OpE, OpF;
7102 switch (Opcode) {
7103 case TargetOpcode::G_SADDO:
7104 case TargetOpcode::G_SADDE:
7105 case TargetOpcode::G_UADDO:
7106 case TargetOpcode::G_UADDE:
7107 case TargetOpcode::G_ADD:
7108 OpO = TargetOpcode::G_UADDO;
7109 OpE = TargetOpcode::G_UADDE;
7110 OpF = TargetOpcode::G_UADDE;
7111 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7112 OpF = TargetOpcode::G_SADDE;
7113 break;
7114 case TargetOpcode::G_SSUBO:
7115 case TargetOpcode::G_SSUBE:
7116 case TargetOpcode::G_USUBO:
7117 case TargetOpcode::G_USUBE:
7118 case TargetOpcode::G_SUB:
7119 OpO = TargetOpcode::G_USUBO;
7120 OpE = TargetOpcode::G_USUBE;
7121 OpF = TargetOpcode::G_USUBE;
7122 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7123 OpF = TargetOpcode::G_SSUBE;
7124 break;
7125 default:
7126 llvm_unreachable("Unexpected add/sub opcode!");
7127 }
7128
7129 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7130 unsigned NumDefs = MI.getNumExplicitDefs();
7131 Register Src1 = MI.getOperand(NumDefs).getReg();
7132 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7133 Register CarryDst, CarryIn;
7134 if (NumDefs == 2)
7135 CarryDst = MI.getOperand(1).getReg();
7136 if (MI.getNumOperands() == NumDefs + 3)
7137 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7138
7139 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7140 LLT LeftoverTy, DummyTy;
7141 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7142 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7143 MIRBuilder, MRI);
7144 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7145 MRI);
7146
7147 int NarrowParts = Src1Regs.size();
7148 Src1Regs.append(Src1Left);
7149 Src2Regs.append(Src2Left);
7150 DstRegs.reserve(Src1Regs.size());
7151
7152 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7153 Register DstReg =
7154 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7155 Register CarryOut;
7156 // Forward the final carry-out to the destination register
7157 if (i == e - 1 && CarryDst)
7158 CarryOut = CarryDst;
7159 else
7160 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7161
7162 if (!CarryIn) {
7163 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7164 {Src1Regs[i], Src2Regs[i]});
7165 } else if (i == e - 1) {
7166 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7167 {Src1Regs[i], Src2Regs[i], CarryIn});
7168 } else {
7169 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7170 {Src1Regs[i], Src2Regs[i], CarryIn});
7171 }
7172
7173 DstRegs.push_back(DstReg);
7174 CarryIn = CarryOut;
7175 }
7176 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7177 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7178 ArrayRef(DstRegs).drop_front(NarrowParts));
7179
7180 MI.eraseFromParent();
7181 return Legalized;
7182}
7183
7186 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7187
7188 LLT Ty = MRI.getType(DstReg);
7189 if (Ty.isVector())
7190 return UnableToLegalize;
7191
7192 unsigned Size = Ty.getSizeInBits();
7193 unsigned NarrowSize = NarrowTy.getSizeInBits();
7194 if (Size % NarrowSize != 0)
7195 return UnableToLegalize;
7196
7197 unsigned NumParts = Size / NarrowSize;
7198 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7199 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7200
7201 SmallVector<Register, 2> Src1Parts, Src2Parts;
7202 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7203 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7204 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7205 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7206
7207 // Take only high half of registers if this is high mul.
7208 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7209 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7210 MI.eraseFromParent();
7211 return Legalized;
7212}
7213
7216 LLT NarrowTy) {
7217 if (TypeIdx != 0)
7218 return UnableToLegalize;
7219
7220 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7221
7222 Register Src = MI.getOperand(1).getReg();
7223 LLT SrcTy = MRI.getType(Src);
7224
7225 // If all finite floats fit into the narrowed integer type, we can just swap
7226 // out the result type. This is practically only useful for conversions from
7227 // half to at least 16-bits, so just handle the one case.
7228 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7229 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7230 return UnableToLegalize;
7231
7232 Observer.changingInstr(MI);
7233 narrowScalarDst(MI, NarrowTy, 0,
7234 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7235 Observer.changedInstr(MI);
7236 return Legalized;
7237}
7238
7241 LLT NarrowTy) {
7242 if (TypeIdx != 1)
7243 return UnableToLegalize;
7244
7245 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7246
7247 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7248 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7249 // NarrowSize.
7250 if (SizeOp1 % NarrowSize != 0)
7251 return UnableToLegalize;
7252 int NumParts = SizeOp1 / NarrowSize;
7253
7254 SmallVector<Register, 2> SrcRegs, DstRegs;
7255 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7256 MIRBuilder, MRI);
7257
7258 Register OpReg = MI.getOperand(0).getReg();
7259 uint64_t OpStart = MI.getOperand(2).getImm();
7260 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7261 for (int i = 0; i < NumParts; ++i) {
7262 unsigned SrcStart = i * NarrowSize;
7263
7264 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7265 // No part of the extract uses this subregister, ignore it.
7266 continue;
7267 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7268 // The entire subregister is extracted, forward the value.
7269 DstRegs.push_back(SrcRegs[i]);
7270 continue;
7271 }
7272
7273 // OpSegStart is where this destination segment would start in OpReg if it
7274 // extended infinitely in both directions.
7275 int64_t ExtractOffset;
7276 uint64_t SegSize;
7277 if (OpStart < SrcStart) {
7278 ExtractOffset = 0;
7279 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7280 } else {
7281 ExtractOffset = OpStart - SrcStart;
7282 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7283 }
7284
7285 Register SegReg = SrcRegs[i];
7286 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7287 // A genuine extract is needed.
7288 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7289 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7290 }
7291
7292 DstRegs.push_back(SegReg);
7293 }
7294
7295 Register DstReg = MI.getOperand(0).getReg();
7296 if (MRI.getType(DstReg).isVector())
7297 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7298 else if (DstRegs.size() > 1)
7299 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7300 else
7301 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7302 MI.eraseFromParent();
7303 return Legalized;
7304}
7305
7308 LLT NarrowTy) {
7309 // FIXME: Don't know how to handle secondary types yet.
7310 if (TypeIdx != 0)
7311 return UnableToLegalize;
7312
7313 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7314 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7315 LLT LeftoverTy;
7316 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7317 LeftoverRegs, MIRBuilder, MRI);
7318
7319 SrcRegs.append(LeftoverRegs);
7320
7321 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7322 Register OpReg = MI.getOperand(2).getReg();
7323 uint64_t OpStart = MI.getOperand(3).getImm();
7324 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7325 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7326 unsigned DstStart = I * NarrowSize;
7327
7328 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7329 // The entire subregister is defined by this insert, forward the new
7330 // value.
7331 DstRegs.push_back(OpReg);
7332 continue;
7333 }
7334
7335 Register SrcReg = SrcRegs[I];
7336 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7337 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7338 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7339 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7340 }
7341
7342 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7343 // No part of the insert affects this subregister, forward the original.
7344 DstRegs.push_back(SrcReg);
7345 continue;
7346 }
7347
7348 // OpSegStart is where this destination segment would start in OpReg if it
7349 // extended infinitely in both directions.
7350 int64_t ExtractOffset, InsertOffset;
7351 uint64_t SegSize;
7352 if (OpStart < DstStart) {
7353 InsertOffset = 0;
7354 ExtractOffset = DstStart - OpStart;
7355 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7356 } else {
7357 InsertOffset = OpStart - DstStart;
7358 ExtractOffset = 0;
7359 SegSize =
7360 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7361 }
7362
7363 Register SegReg = OpReg;
7364 if (ExtractOffset != 0 || SegSize != OpSize) {
7365 // A genuine extract is needed.
7366 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7367 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7368 }
7369
7370 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7371 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7372 DstRegs.push_back(DstReg);
7373 }
7374
7375 uint64_t WideSize = DstRegs.size() * NarrowSize;
7376 Register DstReg = MI.getOperand(0).getReg();
7377 if (WideSize > RegTy.getSizeInBits()) {
7378 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7379 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7380 MIRBuilder.buildTrunc(DstReg, MergeReg);
7381 } else
7382 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7383
7384 MI.eraseFromParent();
7385 return Legalized;
7386}
7387
7390 LLT NarrowTy) {
7391 Register DstReg = MI.getOperand(0).getReg();
7392 LLT DstTy = MRI.getType(DstReg);
7393
7394 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7395
7396 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7397 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7398 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7399 LLT LeftoverTy;
7400 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7401 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7402 return UnableToLegalize;
7403
7404 LLT Unused;
7405 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7406 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7407 llvm_unreachable("inconsistent extractParts result");
7408
7409 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7410 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7411 {Src0Regs[I], Src1Regs[I]});
7412 DstRegs.push_back(Inst.getReg(0));
7413 }
7414
7415 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7416 auto Inst = MIRBuilder.buildInstr(
7417 MI.getOpcode(),
7418 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7419 DstLeftoverRegs.push_back(Inst.getReg(0));
7420 }
7421
7422 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7423 LeftoverTy, DstLeftoverRegs);
7424
7425 MI.eraseFromParent();
7426 return Legalized;
7427}
7428
7431 LLT NarrowTy) {
7432 if (TypeIdx != 0)
7433 return UnableToLegalize;
7434
7435 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7436
7437 LLT DstTy = MRI.getType(DstReg);
7438 if (DstTy.isVector())
7439 return UnableToLegalize;
7440
7442 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7443 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7444 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7445
7446 MI.eraseFromParent();
7447 return Legalized;
7448}
7449
7452 LLT NarrowTy) {
7453 if (TypeIdx != 0)
7454 return UnableToLegalize;
7455
7456 Register CondReg = MI.getOperand(1).getReg();
7457 LLT CondTy = MRI.getType(CondReg);
7458 if (CondTy.isVector()) // TODO: Handle vselect
7459 return UnableToLegalize;
7460
7461 Register DstReg = MI.getOperand(0).getReg();
7462 LLT DstTy = MRI.getType(DstReg);
7463
7464 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7465 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7466 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7467 LLT LeftoverTy;
7468 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7469 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7470 return UnableToLegalize;
7471
7472 LLT Unused;
7473 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7474 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7475 llvm_unreachable("inconsistent extractParts result");
7476
7477 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7478 auto Select = MIRBuilder.buildSelect(NarrowTy,
7479 CondReg, Src1Regs[I], Src2Regs[I]);
7480 DstRegs.push_back(Select.getReg(0));
7481 }
7482
7483 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7484 auto Select = MIRBuilder.buildSelect(
7485 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7486 DstLeftoverRegs.push_back(Select.getReg(0));
7487 }
7488
7489 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7490 LeftoverTy, DstLeftoverRegs);
7491
7492 MI.eraseFromParent();
7493 return Legalized;
7494}
7495
7498 LLT NarrowTy) {
7499 if (TypeIdx != 1)
7500 return UnableToLegalize;
7501
7502 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7503 unsigned NarrowSize = NarrowTy.getSizeInBits();
7504
7505 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7506 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7507
7509 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7510 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7511 auto C_0 = B.buildConstant(NarrowTy, 0);
7512 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7513 UnmergeSrc.getReg(1), C_0);
7514 auto LoCTLZ = IsUndef ?
7515 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7516 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7517 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7518 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7519 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7520 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7521
7522 MI.eraseFromParent();
7523 return Legalized;
7524 }
7525
7526 return UnableToLegalize;
7527}
7528
7531 LLT NarrowTy) {
7532 if (TypeIdx != 1)
7533 return UnableToLegalize;
7534
7535 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7536 unsigned NarrowSize = NarrowTy.getSizeInBits();
7537
7538 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7539 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7540
7542 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7543 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7544 auto C_0 = B.buildConstant(NarrowTy, 0);
7545 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7546 UnmergeSrc.getReg(0), C_0);
7547 auto HiCTTZ = IsUndef ?
7548 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7549 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7550 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7551 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7552 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7553 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7554
7555 MI.eraseFromParent();
7556 return Legalized;
7557 }
7558
7559 return UnableToLegalize;
7560}
7561
7564 LLT NarrowTy) {
7565 if (TypeIdx != 1)
7566 return UnableToLegalize;
7567
7568 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7569 unsigned NarrowSize = NarrowTy.getSizeInBits();
7570
7571 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7572 return UnableToLegalize;
7573
7575
7576 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7577 Register Lo = UnmergeSrc.getReg(0);
7578 Register Hi = UnmergeSrc.getReg(1);
7579
7580 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7581 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7582
7583 auto LoSign = B.buildAShr(NarrowTy, Lo, ShAmt);
7584 auto LoSameSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7585 LoSign.getReg(0), Sign.getReg(0));
7586
7587 auto HiIsSign =
7588 B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign.getReg(0));
7589
7590 auto LoCTLS = B.buildCTLS(DstTy, Lo);
7591 auto GNarrowSize = B.buildConstant(DstTy, NarrowSize);
7592 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLS, GNarrowSize);
7593
7594 // If the low half flips sign, the run of redundant bits stops at the
7595 // boundary, so use (NarrowSize - 1) instead of extending into Lo.
7596 auto GNarrowSizeMinus1 = B.buildConstant(DstTy, NarrowSize - 1);
7597 auto HiSignResult =
7598 B.buildSelect(DstTy, LoSameSign, HiIsSignCTLS, GNarrowSizeMinus1);
7599
7600 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7601
7602 B.buildSelect(DstReg, HiIsSign, HiSignResult, HiCTLS);
7603
7604 MI.eraseFromParent();
7605 return Legalized;
7606}
7607
7610 LLT NarrowTy) {
7611 if (TypeIdx != 1)
7612 return UnableToLegalize;
7613
7614 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7615 unsigned NarrowSize = NarrowTy.getSizeInBits();
7616
7617 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7618 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7619
7620 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7621 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7622 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7623
7624 MI.eraseFromParent();
7625 return Legalized;
7626 }
7627
7628 return UnableToLegalize;
7629}
7630
7633 LLT NarrowTy) {
7634 if (TypeIdx != 1)
7635 return UnableToLegalize;
7636
7638 Register ExpReg = MI.getOperand(2).getReg();
7639 LLT ExpTy = MRI.getType(ExpReg);
7640
7641 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7642
7643 // Clamp the exponent to the range of the target type.
7644 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7645 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7646 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7647 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7648
7649 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7650 Observer.changingInstr(MI);
7651 MI.getOperand(2).setReg(Trunc.getReg(0));
7652 Observer.changedInstr(MI);
7653 return Legalized;
7654}
7655
7658 unsigned Opc = MI.getOpcode();
7659 const auto &TII = MIRBuilder.getTII();
7660 auto isSupported = [this](const LegalityQuery &Q) {
7661 auto QAction = LI.getAction(Q).Action;
7662 return QAction == Legal || QAction == Libcall || QAction == Custom;
7663 };
7664 switch (Opc) {
7665 default:
7666 return UnableToLegalize;
7667 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7668 // This trivially expands to CTLZ.
7669 Observer.changingInstr(MI);
7670 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7671 Observer.changedInstr(MI);
7672 return Legalized;
7673 }
7674 case TargetOpcode::G_CTLZ: {
7675 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7676 unsigned Len = SrcTy.getScalarSizeInBits();
7677
7678 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7679 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7680 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7681 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7682 auto ICmp = MIRBuilder.buildICmp(
7683 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7684 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7685 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7686 MI.eraseFromParent();
7687 return Legalized;
7688 }
7689 // for now, we do this:
7690 // NewLen = NextPowerOf2(Len);
7691 // x = x | (x >> 1);
7692 // x = x | (x >> 2);
7693 // ...
7694 // x = x | (x >>16);
7695 // x = x | (x >>32); // for 64-bit input
7696 // Upto NewLen/2
7697 // return Len - popcount(x);
7698 //
7699 // Ref: "Hacker's Delight" by Henry Warren
7700 Register Op = SrcReg;
7701 unsigned NewLen = PowerOf2Ceil(Len);
7702 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7703 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7704 auto MIBOp = MIRBuilder.buildOr(
7705 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7706 Op = MIBOp.getReg(0);
7707 }
7708 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7709 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7710 MIBPop);
7711 MI.eraseFromParent();
7712 return Legalized;
7713 }
7714 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7715 // This trivially expands to CTTZ.
7716 Observer.changingInstr(MI);
7717 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7718 Observer.changedInstr(MI);
7719 return Legalized;
7720 }
7721 case TargetOpcode::G_CTTZ: {
7722 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7723
7724 unsigned Len = SrcTy.getScalarSizeInBits();
7725 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7726 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7727 // zero.
7728 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7729 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7730 auto ICmp = MIRBuilder.buildICmp(
7731 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7732 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7733 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7734 MI.eraseFromParent();
7735 return Legalized;
7736 }
7737 // for now, we use: { return popcount(~x & (x - 1)); }
7738 // unless the target has ctlz but not ctpop, in which case we use:
7739 // { return 32 - nlz(~x & (x-1)); }
7740 // Ref: "Hacker's Delight" by Henry Warren
7741 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7742 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7743 auto MIBTmp = MIRBuilder.buildAnd(
7744 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7745 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7746 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7747 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7748 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7749 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7750 MI.eraseFromParent();
7751 return Legalized;
7752 }
7753 Observer.changingInstr(MI);
7754 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7755 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7756 Observer.changedInstr(MI);
7757 return Legalized;
7758 }
7759 case TargetOpcode::G_CTPOP: {
7760 Register SrcReg = MI.getOperand(1).getReg();
7761 LLT Ty = MRI.getType(SrcReg);
7762 unsigned Size = Ty.getScalarSizeInBits();
7764
7765 // Bail out on irregular type lengths.
7766 if (Size > 128 || Size % 8 != 0)
7767 return UnableToLegalize;
7768
7769 // Count set bits in blocks of 2 bits. Default approach would be
7770 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7771 // We use following formula instead:
7772 // B2Count = val - { (val >> 1) & 0x55555555 }
7773 // since it gives same result in blocks of 2 with one instruction less.
7774 auto C_1 = B.buildConstant(Ty, 1);
7775 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7776 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7777 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7778 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7779 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7780
7781 // In order to get count in blocks of 4 add values from adjacent block of 2.
7782 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7783 auto C_2 = B.buildConstant(Ty, 2);
7784 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7785 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7786 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7787 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7788 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7789 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7790
7791 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7792 // addition since count value sits in range {0,...,8} and 4 bits are enough
7793 // to hold such binary values. After addition high 4 bits still hold count
7794 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7795 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7796 auto C_4 = B.buildConstant(Ty, 4);
7797 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7798 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7799 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7800 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7801 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7802
7803 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7804 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7805 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7806 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7807
7808 // Shift count result from 8 high bits to low bits.
7809 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7810
7811 auto IsMulSupported = [this](const LLT Ty) {
7812 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7813 return Action == Legal || Action == WidenScalar || Action == Custom;
7814 };
7815 if (IsMulSupported(Ty)) {
7816 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7817 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7818 } else {
7819 auto ResTmp = B8Count;
7820 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7821 auto ShiftC = B.buildConstant(Ty, Shift);
7822 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7823 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7824 }
7825 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7826 }
7827 MI.eraseFromParent();
7828 return Legalized;
7829 }
7830 case TargetOpcode::G_CTLS: {
7831 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7832
7833 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7834 auto SignIdxC =
7835 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7836 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7837
7838 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7839
7840 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7841 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7842
7843 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7844 MI.eraseFromParent();
7845 return Legalized;
7846 }
7847 }
7848}
7849
7850// Check that (every element of) Reg is undef or not an exact multiple of BW.
7852 Register Reg, unsigned BW) {
7853 return matchUnaryPredicate(
7854 MRI, Reg,
7855 [=](const Constant *C) {
7856 // Null constant here means an undef.
7858 return !CI || CI->getValue().urem(BW) != 0;
7859 },
7860 /*AllowUndefs*/ true);
7861}
7862
7865 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7866 LLT Ty = MRI.getType(Dst);
7867 LLT ShTy = MRI.getType(Z);
7868
7869 unsigned BW = Ty.getScalarSizeInBits();
7870
7871 if (!isPowerOf2_32(BW))
7872 return UnableToLegalize;
7873
7874 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7875 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7876
7877 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7878 // fshl X, Y, Z -> fshr X, Y, -Z
7879 // fshr X, Y, Z -> fshl X, Y, -Z
7880 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7881 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7882 } else {
7883 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7884 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7885 auto One = MIRBuilder.buildConstant(ShTy, 1);
7886 if (IsFSHL) {
7887 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7888 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7889 } else {
7890 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7891 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7892 }
7893
7894 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7895 }
7896
7897 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7898 MI.eraseFromParent();
7899 return Legalized;
7900}
7901
7904 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7905 LLT Ty = MRI.getType(Dst);
7906 LLT ShTy = MRI.getType(Z);
7907
7908 const unsigned BW = Ty.getScalarSizeInBits();
7909 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7910
7911 Register ShX, ShY;
7912 Register ShAmt, InvShAmt;
7913
7914 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7915 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7916 // fshl: X << C | Y >> (BW - C)
7917 // fshr: X << (BW - C) | Y >> C
7918 // where C = Z % BW is not zero
7919 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7920 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7921 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7922 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7923 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7924 } else {
7925 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7926 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7927 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7928 if (isPowerOf2_32(BW)) {
7929 // Z % BW -> Z & (BW - 1)
7930 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7931 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7932 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7933 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7934 } else {
7935 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7936 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7937 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7938 }
7939
7940 auto One = MIRBuilder.buildConstant(ShTy, 1);
7941 if (IsFSHL) {
7942 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7943 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7944 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7945 } else {
7946 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7947 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7948 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7949 }
7950 }
7951
7952 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7953 MI.eraseFromParent();
7954 return Legalized;
7955}
7956
7959 // These operations approximately do the following (while avoiding undefined
7960 // shifts by BW):
7961 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7962 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7963 Register Dst = MI.getOperand(0).getReg();
7964 LLT Ty = MRI.getType(Dst);
7965 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7966
7967 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7968 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7969
7970 // TODO: Use smarter heuristic that accounts for vector legalization.
7971 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7972 return lowerFunnelShiftAsShifts(MI);
7973
7974 // This only works for powers of 2, fallback to shifts if it fails.
7975 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7976 if (Result == UnableToLegalize)
7977 return lowerFunnelShiftAsShifts(MI);
7978 return Result;
7979}
7980
7982 auto [Dst, Src] = MI.getFirst2Regs();
7983 LLT DstTy = MRI.getType(Dst);
7984 LLT SrcTy = MRI.getType(Src);
7985
7986 uint32_t DstTySize = DstTy.getSizeInBits();
7987 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7988 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7989
7990 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7991 !isPowerOf2_32(SrcTyScalarSize))
7992 return UnableToLegalize;
7993
7994 // The step between extend is too large, split it by creating an intermediate
7995 // extend instruction
7996 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7997 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7998 // If the destination type is illegal, split it into multiple statements
7999 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8000 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8001 // Unmerge the vector
8002 LLT EltTy = MidTy.changeElementCount(
8004 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8005
8006 // ZExt the vectors
8007 LLT ZExtResTy = DstTy.changeElementCount(
8009 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8010 {UnmergeSrc.getReg(0)});
8011 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8012 {UnmergeSrc.getReg(1)});
8013
8014 // Merge the ending vectors
8015 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8016
8017 MI.eraseFromParent();
8018 return Legalized;
8019 }
8020 return UnableToLegalize;
8021}
8022
8024 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8025 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8026 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8027 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8028 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8029 // %lo16(<4 x s16>) = G_TRUNC %inlo
8030 // %hi16(<4 x s16>) = G_TRUNC %inhi
8031 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8032 // %res(<8 x s8>) = G_TRUNC %in16
8033
8034 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8035
8036 Register DstReg = MI.getOperand(0).getReg();
8037 Register SrcReg = MI.getOperand(1).getReg();
8038 LLT DstTy = MRI.getType(DstReg);
8039 LLT SrcTy = MRI.getType(SrcReg);
8040
8041 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8043 isPowerOf2_32(SrcTy.getNumElements()) &&
8044 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8045 // Split input type.
8046 LLT SplitSrcTy = SrcTy.changeElementCount(
8047 SrcTy.getElementCount().divideCoefficientBy(2));
8048
8049 // First, split the source into two smaller vectors.
8050 SmallVector<Register, 2> SplitSrcs;
8051 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8052
8053 // Truncate the splits into intermediate narrower elements.
8054 LLT InterTy;
8055 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8056 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8057 else
8058 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8059 for (Register &Src : SplitSrcs)
8060 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8061
8062 // Combine the new truncates into one vector
8063 auto Merge = MIRBuilder.buildMergeLikeInstr(
8064 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8065
8066 // Truncate the new vector to the final result type
8067 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8068 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8069 else
8070 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8071
8072 MI.eraseFromParent();
8073
8074 return Legalized;
8075 }
8076 return UnableToLegalize;
8077}
8078
8081 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8082 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8083 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8084 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8085 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8086 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8087 MI.eraseFromParent();
8088 return Legalized;
8089}
8090
8092 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8093
8094 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8095 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8096
8097 MIRBuilder.setInstrAndDebugLoc(MI);
8098
8099 // If a rotate in the other direction is supported, use it.
8100 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8101 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8102 isPowerOf2_32(EltSizeInBits))
8103 return lowerRotateWithReverseRotate(MI);
8104
8105 // If a funnel shift is supported, use it.
8106 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8107 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8108 bool IsFShLegal = false;
8109 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8110 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8111 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8112 Register R3) {
8113 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8114 MI.eraseFromParent();
8115 return Legalized;
8116 };
8117 // If a funnel shift in the other direction is supported, use it.
8118 if (IsFShLegal) {
8119 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8120 } else if (isPowerOf2_32(EltSizeInBits)) {
8121 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8122 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8123 }
8124 }
8125
8126 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8127 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8128 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8129 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8130 Register ShVal;
8131 Register RevShiftVal;
8132 if (isPowerOf2_32(EltSizeInBits)) {
8133 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8134 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8135 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8136 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8137 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8138 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8139 RevShiftVal =
8140 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8141 } else {
8142 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8143 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8144 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8145 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8146 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8147 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8148 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8149 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8150 RevShiftVal =
8151 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8152 }
8153 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8154 MI.eraseFromParent();
8155 return Legalized;
8156}
8157
8158// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8159// representation.
8162 auto [Dst, Src] = MI.getFirst2Regs();
8163 const LLT S64 = LLT::scalar(64);
8164 const LLT S32 = LLT::scalar(32);
8165 const LLT S1 = LLT::scalar(1);
8166
8167 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8168
8169 // unsigned cul2f(ulong u) {
8170 // uint lz = clz(u);
8171 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8172 // u = (u << lz) & 0x7fffffffffffffffUL;
8173 // ulong t = u & 0xffffffffffUL;
8174 // uint v = (e << 23) | (uint)(u >> 40);
8175 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8176 // return as_float(v + r);
8177 // }
8178
8179 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8180 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8181
8182 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8183
8184 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8185 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8186
8187 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8188 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8189
8190 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8191 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8192
8193 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8194
8195 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8196 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8197
8198 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8199 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8200 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8201
8202 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8203 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8204 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8205 auto One = MIRBuilder.buildConstant(S32, 1);
8206
8207 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8208 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8209 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8210 MIRBuilder.buildAdd(Dst, V, R);
8211
8212 MI.eraseFromParent();
8213 return Legalized;
8214}
8215
8216// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8217// operations and G_SITOFP
8220 auto [Dst, Src] = MI.getFirst2Regs();
8221 const LLT S64 = LLT::scalar(64);
8222 const LLT S32 = LLT::scalar(32);
8223 const LLT S1 = LLT::scalar(1);
8224
8225 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8226
8227 // For i64 < INT_MAX we simply reuse SITOFP.
8228 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8229 // saved before division, convert to float by SITOFP, multiply the result
8230 // by 2.
8231 auto One = MIRBuilder.buildConstant(S64, 1);
8232 auto Zero = MIRBuilder.buildConstant(S64, 0);
8233 // Result if Src < INT_MAX
8234 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8235 // Result if Src >= INT_MAX
8236 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8237 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8238 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8239 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8240 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8241 // Check if the original value is larger than INT_MAX by comparing with
8242 // zero to pick one of the two conversions.
8243 auto IsLarge =
8244 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8245 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8246
8247 MI.eraseFromParent();
8248 return Legalized;
8249}
8250
8251// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8252// IEEE double representation.
8255 auto [Dst, Src] = MI.getFirst2Regs();
8256 const LLT S64 = LLT::scalar(64);
8257 const LLT S32 = LLT::scalar(32);
8258
8259 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8260
8261 // We create double value from 32 bit parts with 32 exponent difference.
8262 // Note that + and - are float operations that adjust the implicit leading
8263 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8264 //
8265 // X = 2^52 * 1.0...LowBits
8266 // Y = 2^84 * 1.0...HighBits
8267 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8268 // = - 2^52 * 1.0...HighBits
8269 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8270 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8271 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8272 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8273 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8274 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8275
8276 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8277 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8278 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8279 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8280 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8281 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8282 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8283
8284 MI.eraseFromParent();
8285 return Legalized;
8286}
8287
8288/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8289/// convert fpround f64->f16 without double-rounding, so we manually perform the
8290/// lowering here where we know it is valid.
8293 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8294 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8295 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8296 : MIRBuilder.buildSITOFP(SrcTy, Src);
8297 LLT S32Ty = SrcTy.changeElementSize(32);
8298 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8299 MIRBuilder.buildFPTrunc(Dst, M2);
8300 MI.eraseFromParent();
8302}
8303
8305 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8306
8307 if (SrcTy == LLT::scalar(1)) {
8308 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8309 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8310 MIRBuilder.buildSelect(Dst, Src, True, False);
8311 MI.eraseFromParent();
8312 return Legalized;
8313 }
8314
8315 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8316 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8317
8318 if (SrcTy != LLT::scalar(64))
8319 return UnableToLegalize;
8320
8321 if (DstTy == LLT::scalar(32))
8322 // TODO: SelectionDAG has several alternative expansions to port which may
8323 // be more reasonable depending on the available instructions. We also need
8324 // a more advanced mechanism to choose an optimal version depending on
8325 // target features such as sitofp or CTLZ availability.
8327
8328 if (DstTy == LLT::scalar(64))
8330
8331 return UnableToLegalize;
8332}
8333
8335 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8336
8337 const LLT S64 = LLT::scalar(64);
8338 const LLT S32 = LLT::scalar(32);
8339 const LLT S1 = LLT::scalar(1);
8340
8341 if (SrcTy == S1) {
8342 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8343 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8344 MIRBuilder.buildSelect(Dst, Src, True, False);
8345 MI.eraseFromParent();
8346 return Legalized;
8347 }
8348
8349 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8350 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8351
8352 if (SrcTy != S64)
8353 return UnableToLegalize;
8354
8355 if (DstTy == S32) {
8356 // signed cl2f(long l) {
8357 // long s = l >> 63;
8358 // float r = cul2f((l + s) ^ s);
8359 // return s ? -r : r;
8360 // }
8361 Register L = Src;
8362 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8363 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8364
8365 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8366 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8367 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8368
8369 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8370 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8371 MIRBuilder.buildConstant(S64, 0));
8372 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8373 MI.eraseFromParent();
8374 return Legalized;
8375 }
8376
8377 return UnableToLegalize;
8378}
8379
8381 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8382 const LLT S64 = LLT::scalar(64);
8383 const LLT S32 = LLT::scalar(32);
8384
8385 if (SrcTy != S64 && SrcTy != S32)
8386 return UnableToLegalize;
8387 if (DstTy != S32 && DstTy != S64)
8388 return UnableToLegalize;
8389
8390 // FPTOSI gives same result as FPTOUI for positive signed integers.
8391 // FPTOUI needs to deal with fp values that convert to unsigned integers
8392 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8393
8394 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8395 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8397 APInt::getZero(SrcTy.getSizeInBits()));
8398 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8399
8400 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8401
8402 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8403 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8404 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8405 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8406 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8407 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8408 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8409
8410 const LLT S1 = LLT::scalar(1);
8411
8412 MachineInstrBuilder FCMP =
8413 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8414 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8415
8416 MI.eraseFromParent();
8417 return Legalized;
8418}
8419
8421 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8422 const LLT S64 = LLT::scalar(64);
8423 const LLT S32 = LLT::scalar(32);
8424
8425 // FIXME: Only f32 to i64 conversions are supported.
8426 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8427 return UnableToLegalize;
8428
8429 // Expand f32 -> i64 conversion
8430 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8431 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8432
8433 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8434
8435 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8436 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8437
8438 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8439 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8440
8441 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8442 APInt::getSignMask(SrcEltBits));
8443 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8444 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8445 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8446 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8447
8448 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8449 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8450 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8451
8452 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8453 R = MIRBuilder.buildZExt(DstTy, R);
8454
8455 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8456 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8457 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8458 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8459
8460 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8461 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8462
8463 const LLT S1 = LLT::scalar(1);
8464 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8465 S1, Exponent, ExponentLoBit);
8466
8467 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8468
8469 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8470 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8471
8472 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8473
8474 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8475 S1, Exponent, ZeroSrcTy);
8476
8477 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8478 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8479
8480 MI.eraseFromParent();
8481 return Legalized;
8482}
8483
8486 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8487
8488 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8489 unsigned SatWidth = DstTy.getScalarSizeInBits();
8490
8491 // Determine minimum and maximum integer values and their corresponding
8492 // floating-point values.
8493 APInt MinInt, MaxInt;
8494 if (IsSigned) {
8495 MinInt = APInt::getSignedMinValue(SatWidth);
8496 MaxInt = APInt::getSignedMaxValue(SatWidth);
8497 } else {
8498 MinInt = APInt::getMinValue(SatWidth);
8499 MaxInt = APInt::getMaxValue(SatWidth);
8500 }
8501
8502 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8503 APFloat MinFloat(Semantics);
8504 APFloat MaxFloat(Semantics);
8505
8506 APFloat::opStatus MinStatus =
8507 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8508 APFloat::opStatus MaxStatus =
8509 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8510 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8511 !(MaxStatus & APFloat::opStatus::opInexact);
8512
8513 // If the integer bounds are exactly representable as floats, emit a
8514 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8515 // and selects.
8516 if (AreExactFloatBounds) {
8517 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8518 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8519 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8520 SrcTy.changeElementSize(1), Src, MaxC);
8521 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8522 // Clamp by MaxFloat from above. NaN cannot occur.
8523 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8524 auto MinP =
8525 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8527 auto Min =
8528 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8529 // Convert clamped value to integer. In the unsigned case we're done,
8530 // because we mapped NaN to MinFloat, which will cast to zero.
8531 if (!IsSigned) {
8532 MIRBuilder.buildFPTOUI(Dst, Min);
8533 MI.eraseFromParent();
8534 return Legalized;
8535 }
8536
8537 // Otherwise, select 0 if Src is NaN.
8538 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8539 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8540 DstTy.changeElementSize(1), Src, Src);
8541 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8542 FpToInt);
8543 MI.eraseFromParent();
8544 return Legalized;
8545 }
8546
8547 // Result of direct conversion. The assumption here is that the operation is
8548 // non-trapping and it's fine to apply it to an out-of-range value if we
8549 // select it away later.
8550 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8551 : MIRBuilder.buildFPTOUI(DstTy, Src);
8552
8553 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8554 // MinInt if Src is NaN.
8555 auto ULT =
8556 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8557 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8558 auto Max = MIRBuilder.buildSelect(
8559 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8560 // If Src OGT MaxFloat, select MaxInt.
8561 auto OGT =
8562 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8563 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8564
8565 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8566 // is already zero.
8567 if (!IsSigned) {
8568 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8569 Max);
8570 MI.eraseFromParent();
8571 return Legalized;
8572 }
8573
8574 // Otherwise, select 0 if Src is NaN.
8575 auto Min = MIRBuilder.buildSelect(
8576 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8577 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8578 DstTy.changeElementSize(1), Src, Src);
8579 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8580 MI.eraseFromParent();
8581 return Legalized;
8582}
8583
8584// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8587 const LLT S1 = LLT::scalar(1);
8588 const LLT S32 = LLT::scalar(32);
8589
8590 auto [Dst, Src] = MI.getFirst2Regs();
8591 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8592 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8593
8594 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8595 return UnableToLegalize;
8596
8597 if (MI.getFlag(MachineInstr::FmAfn)) {
8598 unsigned Flags = MI.getFlags();
8599 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8600 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8601 MI.eraseFromParent();
8602 return Legalized;
8603 }
8604
8605 const unsigned ExpMask = 0x7ff;
8606 const unsigned ExpBiasf64 = 1023;
8607 const unsigned ExpBiasf16 = 15;
8608
8609 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8610 Register U = Unmerge.getReg(0);
8611 Register UH = Unmerge.getReg(1);
8612
8613 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8614 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8615
8616 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8617 // add the f16 bias (15) to get the biased exponent for the f16 format.
8618 E = MIRBuilder.buildAdd(
8619 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8620
8621 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8622 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8623
8624 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8625 MIRBuilder.buildConstant(S32, 0x1ff));
8626 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8627
8628 auto Zero = MIRBuilder.buildConstant(S32, 0);
8629 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8630 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8631 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8632
8633 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8634 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8635 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8636 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8637
8638 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8639 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8640
8641 // N = M | (E << 12);
8642 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8643 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8644
8645 // B = clamp(1-E, 0, 13);
8646 auto One = MIRBuilder.buildConstant(S32, 1);
8647 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8648 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8649 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8650
8651 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8652 MIRBuilder.buildConstant(S32, 0x1000));
8653
8654 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8655 auto D0 = MIRBuilder.buildShl(S32, D, B);
8656
8657 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8658 D0, SigSetHigh);
8659 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8660 D = MIRBuilder.buildOr(S32, D, D1);
8661
8662 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8663 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8664
8665 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8666 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8667
8668 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8669 MIRBuilder.buildConstant(S32, 3));
8670 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8671
8672 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8673 MIRBuilder.buildConstant(S32, 5));
8674 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8675
8676 V1 = MIRBuilder.buildOr(S32, V0, V1);
8677 V = MIRBuilder.buildAdd(S32, V, V1);
8678
8679 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8680 E, MIRBuilder.buildConstant(S32, 30));
8681 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8682 MIRBuilder.buildConstant(S32, 0x7c00), V);
8683
8684 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8685 E, MIRBuilder.buildConstant(S32, 1039));
8686 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8687
8688 // Extract the sign bit.
8689 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8690 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8691
8692 // Insert the sign bit
8693 V = MIRBuilder.buildOr(S32, Sign, V);
8694
8695 MIRBuilder.buildTrunc(Dst, V);
8696 MI.eraseFromParent();
8697 return Legalized;
8698}
8699
8702 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8703 const LLT S64 = LLT::scalar(64);
8704 const LLT S16 = LLT::scalar(16);
8705
8706 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8708
8709 return UnableToLegalize;
8710}
8711
8713 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8714 LLT Ty = MRI.getType(Dst);
8715
8716 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8717 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8718 MI.eraseFromParent();
8719 return Legalized;
8720}
8721
8723 switch (Opc) {
8724 case TargetOpcode::G_SMIN:
8725 return CmpInst::ICMP_SLT;
8726 case TargetOpcode::G_SMAX:
8727 return CmpInst::ICMP_SGT;
8728 case TargetOpcode::G_UMIN:
8729 return CmpInst::ICMP_ULT;
8730 case TargetOpcode::G_UMAX:
8731 return CmpInst::ICMP_UGT;
8732 default:
8733 llvm_unreachable("not in integer min/max");
8734 }
8735}
8736
8738 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8739
8740 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8741 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::scalar(1));
8742
8743 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8744 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8745
8746 MI.eraseFromParent();
8747 return Legalized;
8748}
8749
8752 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8753
8754 Register Dst = Cmp->getReg(0);
8755 LLT DstTy = MRI.getType(Dst);
8756 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8757 LLT CmpTy = DstTy.changeElementSize(1);
8758
8759 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8762 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8765
8766 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8767 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8768 Cmp->getRHSReg());
8769 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8770 Cmp->getRHSReg());
8771
8772 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8773 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8774 if (TLI.preferSelectsOverBooleanArithmetic(
8775 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8777 auto One = MIRBuilder.buildConstant(DstTy, 1);
8778 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8779
8780 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8781 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8782 } else {
8784 std::swap(IsGT, IsLT);
8785 // Extend boolean results to DstTy, which is at least i2, before subtracting
8786 // them.
8787 unsigned BoolExtOp =
8788 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8789 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8790 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8791 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8792 }
8793
8794 MI.eraseFromParent();
8795 return Legalized;
8796}
8797
8800 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8801 const int Src0Size = Src0Ty.getScalarSizeInBits();
8802 const int Src1Size = Src1Ty.getScalarSizeInBits();
8803
8804 auto SignBitMask = MIRBuilder.buildConstant(
8805 Src0Ty, APInt::getSignMask(Src0Size));
8806
8807 auto NotSignBitMask = MIRBuilder.buildConstant(
8808 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8809
8810 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8811 Register And1;
8812 if (Src0Ty == Src1Ty) {
8813 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8814 } else if (Src0Size > Src1Size) {
8815 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8816 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8817 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8818 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8819 } else {
8820 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8821 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8822 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8823 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8824 }
8825
8826 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8827 // constants are a nan and -0.0, but the final result should preserve
8828 // everything.
8829 unsigned Flags = MI.getFlags();
8830
8831 // We masked the sign bit and the not-sign bit, so these are disjoint.
8832 Flags |= MachineInstr::Disjoint;
8833
8834 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8835
8836 MI.eraseFromParent();
8837 return Legalized;
8838}
8839
8842 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8843 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8844 // depend on fminnum/fmaxnum.
8845
8846 unsigned NewOp;
8847 switch (MI.getOpcode()) {
8848 case TargetOpcode::G_FMINNUM:
8849 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8850 break;
8851 case TargetOpcode::G_FMINIMUMNUM:
8852 NewOp = TargetOpcode::G_FMINNUM;
8853 break;
8854 case TargetOpcode::G_FMAXNUM:
8855 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8856 break;
8857 case TargetOpcode::G_FMAXIMUMNUM:
8858 NewOp = TargetOpcode::G_FMAXNUM;
8859 break;
8860 default:
8861 llvm_unreachable("unexpected min/max opcode");
8862 }
8863
8864 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8865 LLT Ty = MRI.getType(Dst);
8866
8867 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8868 // Insert canonicalizes if it's possible we need to quiet to get correct
8869 // sNaN behavior.
8870
8871 // Note this must be done here, and not as an optimization combine in the
8872 // absence of a dedicate quiet-snan instruction as we're using an
8873 // omni-purpose G_FCANONICALIZE.
8874 if (!isKnownNeverSNaN(Src0, MRI))
8875 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8876
8877 if (!isKnownNeverSNaN(Src1, MRI))
8878 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8879 }
8880
8881 // If there are no nans, it's safe to simply replace this with the non-IEEE
8882 // version.
8883 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8884 MI.eraseFromParent();
8885 return Legalized;
8886}
8887
8890 unsigned Opc = MI.getOpcode();
8891 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8892 LLT Ty = MRI.getType(Dst);
8893 LLT CmpTy = Ty.changeElementSize(1);
8894
8895 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8896 unsigned OpcIeee =
8897 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8898 unsigned OpcNonIeee =
8899 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8900 bool MinMaxMustRespectOrderedZero = false;
8901 Register Res;
8902
8903 // IEEE variants don't need canonicalization
8904 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8905 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8906 MinMaxMustRespectOrderedZero = true;
8907 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8908 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8909 } else {
8910 auto Compare = MIRBuilder.buildFCmp(
8911 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8912 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8913 }
8914
8915 // Propagate any NaN of both operands
8916 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8917 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8918 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8919
8920 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8921 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8922 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8923 if (Ty.isVector())
8924 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8925
8926 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8927 }
8928
8929 // fminimum/fmaximum requires -0.0 less than +0.0
8930 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8931 GISelValueTracking VT(MIRBuilder.getMF());
8932 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8933 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8934
8935 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8936 const unsigned Flags = MI.getFlags();
8937 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8938 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8939
8940 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8941
8942 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8943 auto LHSSelect =
8944 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8945
8946 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8947 auto RHSSelect =
8948 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8949
8950 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8951 }
8952 }
8953
8954 MIRBuilder.buildCopy(Dst, Res);
8955 MI.eraseFromParent();
8956 return Legalized;
8957}
8958
8960 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8961 Register DstReg = MI.getOperand(0).getReg();
8962 LLT Ty = MRI.getType(DstReg);
8963 unsigned Flags = MI.getFlags();
8964
8965 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8966 Flags);
8967 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8968 MI.eraseFromParent();
8969 return Legalized;
8970}
8971
8974 auto [DstReg, X] = MI.getFirst2Regs();
8975 const unsigned Flags = MI.getFlags();
8976 const LLT Ty = MRI.getType(DstReg);
8977 const LLT CondTy = Ty.changeElementSize(1);
8978
8979 // round(x) =>
8980 // t = trunc(x);
8981 // d = fabs(x - t);
8982 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8983 // return t + o;
8984
8985 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8986
8987 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8988 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8989
8990 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8991 auto Cmp =
8992 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8993
8994 // Could emit G_UITOFP instead
8995 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8996 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8997 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8998 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8999
9000 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9001
9002 MI.eraseFromParent();
9003 return Legalized;
9004}
9005
9007 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9008 unsigned Flags = MI.getFlags();
9009 LLT Ty = MRI.getType(DstReg);
9010 const LLT CondTy = Ty.changeElementSize(1);
9011
9012 // result = trunc(src);
9013 // if (src < 0.0 && src != result)
9014 // result += -1.0.
9015
9016 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9017 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9018
9019 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9020 SrcReg, Zero, Flags);
9021 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9022 SrcReg, Trunc, Flags);
9023 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9024 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9025
9026 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9027 MI.eraseFromParent();
9028 return Legalized;
9029}
9030
9033 const unsigned NumOps = MI.getNumOperands();
9034 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9035 unsigned PartSize = Src0Ty.getSizeInBits();
9036
9037 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9038 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9039
9040 for (unsigned I = 2; I != NumOps; ++I) {
9041 const unsigned Offset = (I - 1) * PartSize;
9042
9043 Register SrcReg = MI.getOperand(I).getReg();
9044 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9045
9046 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9047 MRI.createGenericVirtualRegister(WideTy);
9048
9049 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9050 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9051 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9052 ResultReg = NextResult;
9053 }
9054
9055 if (DstTy.isPointer()) {
9056 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9057 DstTy.getAddressSpace())) {
9058 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9059 return UnableToLegalize;
9060 }
9061
9062 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9063 }
9064
9065 MI.eraseFromParent();
9066 return Legalized;
9067}
9068
9071 const unsigned NumDst = MI.getNumOperands() - 1;
9072 Register SrcReg = MI.getOperand(NumDst).getReg();
9073 Register Dst0Reg = MI.getOperand(0).getReg();
9074 LLT DstTy = MRI.getType(Dst0Reg);
9075 if (DstTy.isPointer())
9076 return UnableToLegalize; // TODO
9077
9078 SrcReg = coerceToScalar(SrcReg);
9079 if (!SrcReg)
9080 return UnableToLegalize;
9081
9082 // Expand scalarizing unmerge as bitcast to integer and shift.
9083 LLT IntTy = MRI.getType(SrcReg);
9084
9085 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9086
9087 const unsigned DstSize = DstTy.getSizeInBits();
9088 unsigned Offset = DstSize;
9089 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9090 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9091 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9092 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9093 }
9094
9095 MI.eraseFromParent();
9096 return Legalized;
9097}
9098
9099/// Lower a vector extract or insert by writing the vector to a stack temporary
9100/// and reloading the element or vector.
9101///
9102/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9103/// =>
9104/// %stack_temp = G_FRAME_INDEX
9105/// G_STORE %vec, %stack_temp
9106/// %idx = clamp(%idx, %vec.getNumElements())
9107/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9108/// %dst = G_LOAD %element_ptr
9111 Register DstReg = MI.getOperand(0).getReg();
9112 Register SrcVec = MI.getOperand(1).getReg();
9113 Register InsertVal;
9114 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9115 InsertVal = MI.getOperand(2).getReg();
9116
9117 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9118
9119 LLT VecTy = MRI.getType(SrcVec);
9120 LLT EltTy = VecTy.getElementType();
9121 unsigned NumElts = VecTy.getNumElements();
9122
9123 int64_t IdxVal;
9124 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9126 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9127
9128 if (InsertVal) {
9129 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9130 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9131 } else {
9132 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9133 }
9134
9135 MI.eraseFromParent();
9136 return Legalized;
9137 }
9138
9139 if (!EltTy.isByteSized()) { // Not implemented.
9140 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9141 return UnableToLegalize;
9142 }
9143
9144 unsigned EltBytes = EltTy.getSizeInBytes();
9145 Align VecAlign = getStackTemporaryAlignment(VecTy);
9146 Align EltAlign;
9147
9148 MachinePointerInfo PtrInfo;
9149 auto StackTemp = createStackTemporary(
9150 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9151 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9152
9153 // Get the pointer to the element, and be sure not to hit undefined behavior
9154 // if the index is out of bounds.
9155 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9156
9157 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9158 int64_t Offset = IdxVal * EltBytes;
9159 PtrInfo = PtrInfo.getWithOffset(Offset);
9160 EltAlign = commonAlignment(VecAlign, Offset);
9161 } else {
9162 // We lose information with a variable offset.
9163 EltAlign = getStackTemporaryAlignment(EltTy);
9164 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9165 }
9166
9167 if (InsertVal) {
9168 // Write the inserted element
9169 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9170
9171 // Reload the whole vector.
9172 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9173 } else {
9174 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9175 }
9176
9177 MI.eraseFromParent();
9178 return Legalized;
9179}
9180
9183 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9184 MI.getFirst3RegLLTs();
9185 LLT IdxTy = LLT::scalar(32);
9186
9187 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9188 Register Undef;
9190 LLT EltTy = DstTy.getScalarType();
9191
9192 DenseMap<unsigned, Register> CachedExtract;
9193
9194 for (int Idx : Mask) {
9195 if (Idx < 0) {
9196 if (!Undef.isValid())
9197 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9198 BuildVec.push_back(Undef);
9199 continue;
9200 }
9201
9202 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9203
9204 int NumElts = Src0Ty.getNumElements();
9205 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9206 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9207 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9208 if (Inserted) {
9209 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9210 It->second =
9211 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9212 }
9213 BuildVec.push_back(It->second);
9214 }
9215
9216 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9217 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9218 MI.eraseFromParent();
9219 return Legalized;
9220}
9221
9224 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9225 MI.getFirst4RegLLTs();
9226
9227 if (VecTy.isScalableVector())
9228 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9229
9230 Align VecAlign = getStackTemporaryAlignment(VecTy);
9231 MachinePointerInfo PtrInfo;
9232 Register StackPtr =
9233 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9234 PtrInfo)
9235 .getReg(0);
9236 MachinePointerInfo ValPtrInfo =
9238
9239 LLT IdxTy = LLT::scalar(32);
9240 LLT ValTy = VecTy.getElementType();
9241 Align ValAlign = getStackTemporaryAlignment(ValTy);
9242
9243 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9244
9245 bool HasPassthru =
9246 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9247
9248 if (HasPassthru)
9249 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9250
9251 Register LastWriteVal;
9252 std::optional<APInt> PassthruSplatVal =
9253 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9254
9255 if (PassthruSplatVal.has_value()) {
9256 LastWriteVal =
9257 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9258 } else if (HasPassthru) {
9259 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9260 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9261 {LLT::scalar(32)}, {Popcount});
9262
9263 Register LastElmtPtr =
9264 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9265 LastWriteVal =
9266 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9267 .getReg(0);
9268 }
9269
9270 unsigned NumElmts = VecTy.getNumElements();
9271 for (unsigned I = 0; I < NumElmts; ++I) {
9272 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9273 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9274 Register ElmtPtr =
9275 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9276 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9277
9278 LLT MaskITy = MaskTy.getElementType();
9279 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9280 if (MaskITy.getSizeInBits() > 1)
9281 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9282
9283 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9284 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9285
9286 if (HasPassthru && I == NumElmts - 1) {
9287 auto EndOfVector =
9288 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9289 auto AllLanesSelected = MIRBuilder.buildICmp(
9290 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9291 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9292 {OutPos, EndOfVector});
9293 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9294
9295 LastWriteVal =
9296 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9297 .getReg(0);
9298 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9299 }
9300 }
9301
9302 // TODO: Use StackPtr's FrameIndex alignment.
9303 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9304
9305 MI.eraseFromParent();
9306 return Legalized;
9307}
9308
9310 Register AllocSize,
9311 Align Alignment,
9312 LLT PtrTy) {
9313 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9314
9315 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9316 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9317
9318 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9319 // have to generate an extra instruction to negate the alloc and then use
9320 // G_PTR_ADD to add the negative offset.
9321 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9322 if (Alignment > Align(1)) {
9323 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9324 AlignMask.negate();
9325 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9326 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9327 }
9328
9329 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9330}
9331
9334 const auto &MF = *MI.getMF();
9335 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9336 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9337 return UnableToLegalize;
9338
9339 Register Dst = MI.getOperand(0).getReg();
9340 Register AllocSize = MI.getOperand(1).getReg();
9341 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9342
9343 LLT PtrTy = MRI.getType(Dst);
9344 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9345 Register SPTmp =
9346 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9347
9348 MIRBuilder.buildCopy(SPReg, SPTmp);
9349 MIRBuilder.buildCopy(Dst, SPTmp);
9350
9351 MI.eraseFromParent();
9352 return Legalized;
9353}
9354
9357 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9358 if (!StackPtr)
9359 return UnableToLegalize;
9360
9361 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9362 MI.eraseFromParent();
9363 return Legalized;
9364}
9365
9368 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9369 if (!StackPtr)
9370 return UnableToLegalize;
9371
9372 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9373 MI.eraseFromParent();
9374 return Legalized;
9375}
9376
9379 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9380 unsigned Offset = MI.getOperand(2).getImm();
9381
9382 // Extract sub-vector or one element
9383 if (SrcTy.isVector()) {
9384 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9385 unsigned DstSize = DstTy.getSizeInBits();
9386
9387 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9388 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9389 // Unmerge and allow access to each Src element for the artifact combiner.
9390 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9391
9392 // Take element(s) we need to extract and copy it (merge them).
9393 SmallVector<Register, 8> SubVectorElts;
9394 for (unsigned Idx = Offset / SrcEltSize;
9395 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9396 SubVectorElts.push_back(Unmerge.getReg(Idx));
9397 }
9398 if (SubVectorElts.size() == 1)
9399 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9400 else
9401 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9402
9403 MI.eraseFromParent();
9404 return Legalized;
9405 }
9406 }
9407
9408 if (DstTy.isScalar() &&
9409 (SrcTy.isScalar() ||
9410 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9411 LLT SrcIntTy = SrcTy;
9412 if (!SrcTy.isScalar()) {
9413 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9414 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9415 }
9416
9417 if (Offset == 0)
9418 MIRBuilder.buildTrunc(DstReg, SrcReg);
9419 else {
9420 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9421 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9422 MIRBuilder.buildTrunc(DstReg, Shr);
9423 }
9424
9425 MI.eraseFromParent();
9426 return Legalized;
9427 }
9428
9429 return UnableToLegalize;
9430}
9431
9433 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9434 uint64_t Offset = MI.getOperand(3).getImm();
9435
9436 LLT DstTy = MRI.getType(Src);
9437 LLT InsertTy = MRI.getType(InsertSrc);
9438
9439 // Insert sub-vector or one element
9440 if (DstTy.isVector() && !InsertTy.isPointer()) {
9441 LLT EltTy = DstTy.getElementType();
9442 unsigned EltSize = EltTy.getSizeInBits();
9443 unsigned InsertSize = InsertTy.getSizeInBits();
9444
9445 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9446 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9447 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9449 unsigned Idx = 0;
9450 // Elements from Src before insert start Offset
9451 for (; Idx < Offset / EltSize; ++Idx) {
9452 DstElts.push_back(UnmergeSrc.getReg(Idx));
9453 }
9454
9455 // Replace elements in Src with elements from InsertSrc
9456 if (InsertTy.getSizeInBits() > EltSize) {
9457 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9458 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9459 ++Idx, ++i) {
9460 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9461 }
9462 } else {
9463 DstElts.push_back(InsertSrc);
9464 ++Idx;
9465 }
9466
9467 // Remaining elements from Src after insert
9468 for (; Idx < DstTy.getNumElements(); ++Idx) {
9469 DstElts.push_back(UnmergeSrc.getReg(Idx));
9470 }
9471
9472 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9473 MI.eraseFromParent();
9474 return Legalized;
9475 }
9476 }
9477
9478 if (InsertTy.isVector() ||
9479 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9480 return UnableToLegalize;
9481
9482 const DataLayout &DL = MIRBuilder.getDataLayout();
9483 if ((DstTy.isPointer() &&
9484 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9485 (InsertTy.isPointer() &&
9486 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9487 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9488 return UnableToLegalize;
9489 }
9490
9491 LLT IntDstTy = DstTy;
9492
9493 if (!DstTy.isScalar()) {
9494 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9495 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9496 }
9497
9498 if (!InsertTy.isScalar()) {
9499 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9500 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9501 }
9502
9503 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9504 if (Offset != 0) {
9505 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9506 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9507 }
9508
9510 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9511
9512 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9513 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9514 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9515
9516 MIRBuilder.buildCast(Dst, Or);
9517 MI.eraseFromParent();
9518 return Legalized;
9519}
9520
9523 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9524 MI.getFirst4RegLLTs();
9525 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9526
9527 LLT Ty = Dst0Ty;
9528 LLT BoolTy = Dst1Ty;
9529
9530 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9531
9532 if (IsAdd)
9533 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9534 else
9535 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9536
9537 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9538
9539 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9540
9541 // For an addition, the result should be less than one of the operands (LHS)
9542 // if and only if the other operand (RHS) is negative, otherwise there will
9543 // be overflow.
9544 // For a subtraction, the result should be less than one of the operands
9545 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9546 // otherwise there will be overflow.
9547 auto ResultLowerThanLHS =
9548 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9549 auto ConditionRHS = MIRBuilder.buildICmp(
9550 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9551
9552 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9553
9554 MIRBuilder.buildCopy(Dst0, NewDst0);
9555 MI.eraseFromParent();
9556
9557 return Legalized;
9558}
9559
9561 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9562 const LLT Ty = MRI.getType(Res);
9563
9564 // sum = LHS + RHS + zext(CarryIn)
9565 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9566 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9567 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9568 MIRBuilder.buildCopy(Res, Sum);
9569
9570 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9571 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9572 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9573 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9574
9575 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9576 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9577
9578 MI.eraseFromParent();
9579 return Legalized;
9580}
9581
9583 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9584 const LLT Ty = MRI.getType(Res);
9585
9586 // Diff = LHS - (RHS + zext(CarryIn))
9587 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9588 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9589 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9590 MIRBuilder.buildCopy(Res, Diff);
9591
9592 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9593 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9594 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9595 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9596 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9597 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9598
9599 MI.eraseFromParent();
9600 return Legalized;
9601}
9602
9605 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9606 LLT Ty = MRI.getType(Res);
9607 bool IsSigned;
9608 bool IsAdd;
9609 unsigned BaseOp;
9610 switch (MI.getOpcode()) {
9611 default:
9612 llvm_unreachable("unexpected addsat/subsat opcode");
9613 case TargetOpcode::G_UADDSAT:
9614 IsSigned = false;
9615 IsAdd = true;
9616 BaseOp = TargetOpcode::G_ADD;
9617 break;
9618 case TargetOpcode::G_SADDSAT:
9619 IsSigned = true;
9620 IsAdd = true;
9621 BaseOp = TargetOpcode::G_ADD;
9622 break;
9623 case TargetOpcode::G_USUBSAT:
9624 IsSigned = false;
9625 IsAdd = false;
9626 BaseOp = TargetOpcode::G_SUB;
9627 break;
9628 case TargetOpcode::G_SSUBSAT:
9629 IsSigned = true;
9630 IsAdd = false;
9631 BaseOp = TargetOpcode::G_SUB;
9632 break;
9633 }
9634
9635 if (IsSigned) {
9636 // sadd.sat(a, b) ->
9637 // hi = 0x7fffffff - smax(a, 0)
9638 // lo = 0x80000000 - smin(a, 0)
9639 // a + smin(smax(lo, b), hi)
9640 // ssub.sat(a, b) ->
9641 // lo = smax(a, -1) - 0x7fffffff
9642 // hi = smin(a, -1) - 0x80000000
9643 // a - smin(smax(lo, b), hi)
9644 // TODO: AMDGPU can use a "median of 3" instruction here:
9645 // a +/- med3(lo, b, hi)
9646 uint64_t NumBits = Ty.getScalarSizeInBits();
9647 auto MaxVal =
9648 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9649 auto MinVal =
9650 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9652 if (IsAdd) {
9653 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9654 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9655 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9656 } else {
9657 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9658 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9659 MaxVal);
9660 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9661 MinVal);
9662 }
9663 auto RHSClamped =
9664 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9665 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9666 } else {
9667 // uadd.sat(a, b) -> a + umin(~a, b)
9668 // usub.sat(a, b) -> a - umin(a, b)
9669 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9670 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9671 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9672 }
9673
9674 MI.eraseFromParent();
9675 return Legalized;
9676}
9677
9680 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9681 LLT Ty = MRI.getType(Res);
9682 LLT BoolTy = Ty.changeElementSize(1);
9683 bool IsSigned;
9684 bool IsAdd;
9685 unsigned OverflowOp;
9686 switch (MI.getOpcode()) {
9687 default:
9688 llvm_unreachable("unexpected addsat/subsat opcode");
9689 case TargetOpcode::G_UADDSAT:
9690 IsSigned = false;
9691 IsAdd = true;
9692 OverflowOp = TargetOpcode::G_UADDO;
9693 break;
9694 case TargetOpcode::G_SADDSAT:
9695 IsSigned = true;
9696 IsAdd = true;
9697 OverflowOp = TargetOpcode::G_SADDO;
9698 break;
9699 case TargetOpcode::G_USUBSAT:
9700 IsSigned = false;
9701 IsAdd = false;
9702 OverflowOp = TargetOpcode::G_USUBO;
9703 break;
9704 case TargetOpcode::G_SSUBSAT:
9705 IsSigned = true;
9706 IsAdd = false;
9707 OverflowOp = TargetOpcode::G_SSUBO;
9708 break;
9709 }
9710
9711 auto OverflowRes =
9712 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9713 Register Tmp = OverflowRes.getReg(0);
9714 Register Ov = OverflowRes.getReg(1);
9715 MachineInstrBuilder Clamp;
9716 if (IsSigned) {
9717 // sadd.sat(a, b) ->
9718 // {tmp, ov} = saddo(a, b)
9719 // ov ? (tmp >>s 31) + 0x80000000 : r
9720 // ssub.sat(a, b) ->
9721 // {tmp, ov} = ssubo(a, b)
9722 // ov ? (tmp >>s 31) + 0x80000000 : r
9723 uint64_t NumBits = Ty.getScalarSizeInBits();
9724 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9725 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9726 auto MinVal =
9727 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9728 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9729 } else {
9730 // uadd.sat(a, b) ->
9731 // {tmp, ov} = uaddo(a, b)
9732 // ov ? 0xffffffff : tmp
9733 // usub.sat(a, b) ->
9734 // {tmp, ov} = usubo(a, b)
9735 // ov ? 0 : tmp
9736 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9737 }
9738 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9739
9740 MI.eraseFromParent();
9741 return Legalized;
9742}
9743
9746 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9747 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9748 "Expected shlsat opcode!");
9749 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9750 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9751 LLT Ty = MRI.getType(Res);
9752 LLT BoolTy = Ty.changeElementSize(1);
9753
9754 unsigned BW = Ty.getScalarSizeInBits();
9755 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9756 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9757 : MIRBuilder.buildLShr(Ty, Result, RHS);
9758
9759 MachineInstrBuilder SatVal;
9760 if (IsSigned) {
9761 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9762 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9763 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9764 MIRBuilder.buildConstant(Ty, 0));
9765 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9766 } else {
9767 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9768 }
9769 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9770 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9771
9772 MI.eraseFromParent();
9773 return Legalized;
9774}
9775
9777 auto [Dst, Src] = MI.getFirst2Regs();
9778 const LLT Ty = MRI.getType(Src);
9779 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9780 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9781
9782 // Swap most and least significant byte, set remaining bytes in Res to zero.
9783 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9784 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9785 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9786 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9787
9788 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9789 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9790 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9791 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9792 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9793 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9794 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9795 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9796 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9797 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9798 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9799 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9800 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9801 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9802 }
9803 Res.getInstr()->getOperand(0).setReg(Dst);
9804
9805 MI.eraseFromParent();
9806 return Legalized;
9807}
9808
9809//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9811 MachineInstrBuilder Src, const APInt &Mask) {
9812 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9813 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9814 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9815 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9816 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9817 return B.buildOr(Dst, LHS, RHS);
9818}
9819
9822 auto [Dst, Src] = MI.getFirst2Regs();
9823 const LLT SrcTy = MRI.getType(Src);
9824 unsigned Size = SrcTy.getScalarSizeInBits();
9825 unsigned VSize = SrcTy.getSizeInBits();
9826
9827 if (Size >= 8) {
9828 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9829 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9830 {LLT::fixed_vector(VSize / 8, 8),
9831 LLT::fixed_vector(VSize / 8, 8)}}))) {
9832 // If bitreverse is legal for i8 vector of the same size, then cast
9833 // to i8 vector type.
9834 // e.g. v4s32 -> v16s8
9835 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9836 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9837 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9838 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9839 MIRBuilder.buildBitcast(Dst, RBIT);
9840 } else {
9841 MachineInstrBuilder BSWAP =
9842 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9843
9844 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9845 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9846 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9847 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9848 APInt::getSplat(Size, APInt(8, 0xF0)));
9849
9850 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9851 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9852 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9853 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9854 APInt::getSplat(Size, APInt(8, 0xCC)));
9855
9856 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9857 // 6|7
9858 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9859 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9860 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9861 }
9862 } else {
9863 // Expand bitreverse for types smaller than 8 bits.
9865 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9867 if (I < J) {
9868 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9869 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9870 } else {
9871 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9872 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9873 }
9874
9875 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9876 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9877 if (I == 0)
9878 Tmp = Tmp2;
9879 else
9880 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9881 }
9882 MIRBuilder.buildCopy(Dst, Tmp);
9883 }
9884
9885 MI.eraseFromParent();
9886 return Legalized;
9887}
9888
9891 MachineFunction &MF = MIRBuilder.getMF();
9892
9893 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9894 int NameOpIdx = IsRead ? 1 : 0;
9895 int ValRegIndex = IsRead ? 0 : 1;
9896
9897 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9898 const LLT Ty = MRI.getType(ValReg);
9899 const MDString *RegStr = cast<MDString>(
9900 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9901
9902 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9903 if (!PhysReg) {
9904 const Function &Fn = MF.getFunction();
9906 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9907 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9908 Fn, MI.getDebugLoc()));
9909 if (IsRead)
9910 MIRBuilder.buildUndef(ValReg);
9911
9912 MI.eraseFromParent();
9913 return Legalized;
9914 }
9915
9916 if (IsRead)
9917 MIRBuilder.buildCopy(ValReg, PhysReg);
9918 else
9919 MIRBuilder.buildCopy(PhysReg, ValReg);
9920
9921 MI.eraseFromParent();
9922 return Legalized;
9923}
9924
9927 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9928 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9929 Register Result = MI.getOperand(0).getReg();
9930 LLT OrigTy = MRI.getType(Result);
9931 auto SizeInBits = OrigTy.getScalarSizeInBits();
9932 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9933
9934 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9935 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9936 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9937 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9938
9939 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9940 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9941 MIRBuilder.buildTrunc(Result, Shifted);
9942
9943 MI.eraseFromParent();
9944 return Legalized;
9945}
9946
9949 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9950 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9951
9952 if (Mask == fcNone) {
9953 MIRBuilder.buildConstant(DstReg, 0);
9954 MI.eraseFromParent();
9955 return Legalized;
9956 }
9957 if (Mask == fcAllFlags) {
9958 MIRBuilder.buildConstant(DstReg, 1);
9959 MI.eraseFromParent();
9960 return Legalized;
9961 }
9962
9963 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9964 // version
9965
9966 unsigned BitSize = SrcTy.getScalarSizeInBits();
9967 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9968
9969 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
9970 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9971
9972 // Various masks.
9973 APInt SignBit = APInt::getSignMask(BitSize);
9974 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9975 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9976 APInt ExpMask = Inf;
9977 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9978 APInt QNaNBitMask =
9979 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9980 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9981
9982 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9983 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9984 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9985 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9986 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9987
9988 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9989 auto Sign =
9990 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9991
9992 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9993 // Clang doesn't support capture of structured bindings:
9994 LLT DstTyCopy = DstTy;
9995 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9996 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9997 };
9998
9999 // Tests that involve more than one class should be processed first.
10000 if ((Mask & fcFinite) == fcFinite) {
10001 // finite(V) ==> abs(V) u< exp_mask
10002 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10003 ExpMaskC));
10004 Mask &= ~fcFinite;
10005 } else if ((Mask & fcFinite) == fcPosFinite) {
10006 // finite(V) && V > 0 ==> V u< exp_mask
10007 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10008 ExpMaskC));
10009 Mask &= ~fcPosFinite;
10010 } else if ((Mask & fcFinite) == fcNegFinite) {
10011 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10012 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10013 ExpMaskC);
10014 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10015 appendToRes(And);
10016 Mask &= ~fcNegFinite;
10017 }
10018
10019 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10020 // fcZero | fcSubnormal => test all exponent bits are 0
10021 // TODO: Handle sign bit specific cases
10022 // TODO: Handle inverted case
10023 if (PartialCheck == (fcZero | fcSubnormal)) {
10024 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10025 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10026 ExpBits, ZeroC));
10027 Mask &= ~PartialCheck;
10028 }
10029 }
10030
10031 // Check for individual classes.
10032 if (FPClassTest PartialCheck = Mask & fcZero) {
10033 if (PartialCheck == fcPosZero)
10034 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10035 AsInt, ZeroC));
10036 else if (PartialCheck == fcZero)
10037 appendToRes(
10038 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10039 else // fcNegZero
10040 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10041 AsInt, SignBitC));
10042 }
10043
10044 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10045 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10046 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10047 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10048 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10049 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10050 auto SubnormalRes =
10051 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10052 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10053 if (PartialCheck == fcNegSubnormal)
10054 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10055 appendToRes(SubnormalRes);
10056 }
10057
10058 if (FPClassTest PartialCheck = Mask & fcInf) {
10059 if (PartialCheck == fcPosInf)
10060 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10061 AsInt, InfC));
10062 else if (PartialCheck == fcInf)
10063 appendToRes(
10064 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10065 else { // fcNegInf
10066 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10067 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10068 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10069 AsInt, NegInfC));
10070 }
10071 }
10072
10073 if (FPClassTest PartialCheck = Mask & fcNan) {
10074 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10075 if (PartialCheck == fcNan) {
10076 // isnan(V) ==> abs(V) u> int(inf)
10077 appendToRes(
10078 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10079 } else if (PartialCheck == fcQNan) {
10080 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10081 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10082 InfWithQnanBitC));
10083 } else { // fcSNan
10084 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10085 // abs(V) u< (unsigned(Inf) | quiet_bit)
10086 auto IsNan =
10087 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10088 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10089 Abs, InfWithQnanBitC);
10090 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10091 }
10092 }
10093
10094 if (FPClassTest PartialCheck = Mask & fcNormal) {
10095 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10096 // (max_exp-1))
10097 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10098 auto ExpMinusOne = MIRBuilder.buildSub(
10099 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10100 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10101 auto NormalRes =
10102 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10103 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10104 if (PartialCheck == fcNegNormal)
10105 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10106 else if (PartialCheck == fcPosNormal) {
10107 auto PosSign = MIRBuilder.buildXor(
10108 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10109 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10110 }
10111 appendToRes(NormalRes);
10112 }
10113
10114 MIRBuilder.buildCopy(DstReg, Res);
10115 MI.eraseFromParent();
10116 return Legalized;
10117}
10118
10120 // Implement G_SELECT in terms of XOR, AND, OR.
10121 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10122 MI.getFirst4RegLLTs();
10123
10124 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10125 if (IsEltPtr) {
10126 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10127 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10128 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10129 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10130 DstTy = NewTy;
10131 }
10132
10133 if (MaskTy.isScalar()) {
10134 // Turn the scalar condition into a vector condition mask if needed.
10135
10136 Register MaskElt = MaskReg;
10137
10138 // The condition was potentially zero extended before, but we want a sign
10139 // extended boolean.
10140 if (MaskTy != LLT::scalar(1))
10141 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10142
10143 // Continue the sign extension (or truncate) to match the data type.
10144 MaskElt =
10145 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10146
10147 if (DstTy.isVector()) {
10148 // Generate a vector splat idiom.
10149 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10150 MaskReg = ShufSplat.getReg(0);
10151 } else {
10152 MaskReg = MaskElt;
10153 }
10154 MaskTy = DstTy;
10155 } else if (!DstTy.isVector()) {
10156 // Cannot handle the case that mask is a vector and dst is a scalar.
10157 return UnableToLegalize;
10158 }
10159
10160 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10161 return UnableToLegalize;
10162 }
10163
10164 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10165 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10166 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10167 if (IsEltPtr) {
10168 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10169 MIRBuilder.buildIntToPtr(DstReg, Or);
10170 } else {
10171 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10172 }
10173 MI.eraseFromParent();
10174 return Legalized;
10175}
10176
10178 // Split DIVREM into individual instructions.
10179 unsigned Opcode = MI.getOpcode();
10180
10181 MIRBuilder.buildInstr(
10182 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10183 : TargetOpcode::G_UDIV,
10184 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10185 MIRBuilder.buildInstr(
10186 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10187 : TargetOpcode::G_UREM,
10188 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10189 MI.eraseFromParent();
10190 return Legalized;
10191}
10192
10195 // Expand %res = G_ABS %a into:
10196 // %v1 = G_ASHR %a, scalar_size-1
10197 // %v2 = G_ADD %a, %v1
10198 // %res = G_XOR %v2, %v1
10199 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10200 Register OpReg = MI.getOperand(1).getReg();
10201 auto ShiftAmt =
10202 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10203 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10204 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10205 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10206 MI.eraseFromParent();
10207 return Legalized;
10208}
10209
10212 // Expand %res = G_ABS %a into:
10213 // %v1 = G_CONSTANT 0
10214 // %v2 = G_SUB %v1, %a
10215 // %res = G_SMAX %a, %v2
10216 Register SrcReg = MI.getOperand(1).getReg();
10217 LLT Ty = MRI.getType(SrcReg);
10218 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10219 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10220 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10221 MI.eraseFromParent();
10222 return Legalized;
10223}
10224
10227 Register SrcReg = MI.getOperand(1).getReg();
10228 Register DestReg = MI.getOperand(0).getReg();
10229 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10230 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10231 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10232 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10233 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10234 MI.eraseFromParent();
10235 return Legalized;
10236}
10237
10240 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10241 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10242 "Expected G_ABDS or G_ABDU instruction");
10243
10244 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10245 LLT Ty = MRI.getType(LHS);
10246
10247 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10248 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10249 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10250 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10251 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10254 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10255 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10256
10257 MI.eraseFromParent();
10258 return Legalized;
10259}
10260
10263 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10264 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10265 "Expected G_ABDS or G_ABDU instruction");
10266
10267 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10268 LLT Ty = MRI.getType(LHS);
10269
10270 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10271 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10272 Register MaxReg, MinReg;
10273 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10274 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10275 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10276 } else {
10277 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10278 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10279 }
10280 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10281
10282 MI.eraseFromParent();
10283 return Legalized;
10284}
10285
10287 Register SrcReg = MI.getOperand(1).getReg();
10288 Register DstReg = MI.getOperand(0).getReg();
10289
10290 LLT Ty = MRI.getType(DstReg);
10291
10292 // Reset sign bit
10293 MIRBuilder.buildAnd(
10294 DstReg, SrcReg,
10295 MIRBuilder.buildConstant(
10296 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10297
10298 MI.eraseFromParent();
10299 return Legalized;
10300}
10301
10304 Register SrcReg = MI.getOperand(1).getReg();
10305 LLT SrcTy = MRI.getType(SrcReg);
10306 LLT DstTy = MRI.getType(SrcReg);
10307
10308 // The source could be a scalar if the IR type was <1 x sN>.
10309 if (SrcTy.isScalar()) {
10310 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10311 return UnableToLegalize; // FIXME: handle extension.
10312 // This can be just a plain copy.
10313 Observer.changingInstr(MI);
10314 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10315 Observer.changedInstr(MI);
10316 return Legalized;
10317 }
10318 return UnableToLegalize;
10319}
10320
10322 MachineFunction &MF = *MI.getMF();
10323 const DataLayout &DL = MIRBuilder.getDataLayout();
10324 LLVMContext &Ctx = MF.getFunction().getContext();
10325 Register ListPtr = MI.getOperand(1).getReg();
10326 LLT PtrTy = MRI.getType(ListPtr);
10327
10328 // LstPtr is a pointer to the head of the list. Get the address
10329 // of the head of the list.
10330 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10331 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10332 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10333 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10334
10335 const Align A(MI.getOperand(2).getImm());
10336 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10337 if (A > TLI.getMinStackArgumentAlignment()) {
10338 Register AlignAmt =
10339 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10340 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10341 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10342 VAList = AndDst.getReg(0);
10343 }
10344
10345 // Increment the pointer, VAList, to the next vaarg
10346 // The list should be bumped by the size of element in the current head of
10347 // list.
10348 Register Dst = MI.getOperand(0).getReg();
10349 LLT LLTTy = MRI.getType(Dst);
10350 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10351 auto IncAmt =
10352 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10353 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10354
10355 // Store the increment VAList to the legalized pointer
10357 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10358 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10359 // Load the actual argument out of the pointer VAList
10360 Align EltAlignment = DL.getABITypeAlign(Ty);
10361 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10362 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10363 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10364
10365 MI.eraseFromParent();
10366 return Legalized;
10367}
10368
10370 // On Darwin, -Os means optimize for size without hurting performance, so
10371 // only really optimize for size when -Oz (MinSize) is used.
10373 return MF.getFunction().hasMinSize();
10374 return MF.getFunction().hasOptSize();
10375}
10376
10377// Returns a list of types to use for memory op lowering in MemOps. A partial
10378// port of findOptimalMemOpLowering in TargetLowering.
10379static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10380 unsigned Limit, const MemOp &Op,
10381 unsigned DstAS, unsigned SrcAS,
10382 const AttributeList &FuncAttributes,
10383 const TargetLowering &TLI) {
10384 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10385 return false;
10386
10387 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10388
10389 if (Ty == LLT()) {
10390 // Use the largest scalar type whose alignment constraints are satisfied.
10391 // We only need to check DstAlign here as SrcAlign is always greater or
10392 // equal to DstAlign (or zero).
10393 Ty = LLT::scalar(64);
10394 if (Op.isFixedDstAlign())
10395 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10396 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10397 Ty = LLT::scalar(Ty.getSizeInBytes());
10398 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10399 // FIXME: check for the largest legal type we can load/store to.
10400 }
10401
10402 unsigned NumMemOps = 0;
10403 uint64_t Size = Op.size();
10404 while (Size) {
10405 unsigned TySize = Ty.getSizeInBytes();
10406 while (TySize > Size) {
10407 // For now, only use non-vector load / store's for the left-over pieces.
10408 LLT NewTy = Ty;
10409 // FIXME: check for mem op safety and legality of the types. Not all of
10410 // SDAGisms map cleanly to GISel concepts.
10411 if (NewTy.isVector())
10412 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10413 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10414 unsigned NewTySize = NewTy.getSizeInBytes();
10415 assert(NewTySize > 0 && "Could not find appropriate type");
10416
10417 // If the new LLT cannot cover all of the remaining bits, then consider
10418 // issuing a (or a pair of) unaligned and overlapping load / store.
10419 unsigned Fast;
10420 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10421 MVT VT = getMVTForLLT(Ty);
10422 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10424 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10426 Fast)
10427 TySize = Size;
10428 else {
10429 Ty = NewTy;
10430 TySize = NewTySize;
10431 }
10432 }
10433
10434 if (++NumMemOps > Limit)
10435 return false;
10436
10437 MemOps.push_back(Ty);
10438 Size -= TySize;
10439 }
10440
10441 return true;
10442}
10443
10444// Get a vectorized representation of the memset value operand, GISel edition.
10446 MachineRegisterInfo &MRI = *MIB.getMRI();
10447 unsigned NumBits = Ty.getScalarSizeInBits();
10448 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10449 if (!Ty.isVector() && ValVRegAndVal) {
10450 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10451 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10452 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10453 }
10454
10455 // Extend the byte value to the larger type, and then multiply by a magic
10456 // value 0x010101... in order to replicate it across every byte.
10457 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10458 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10459 return MIB.buildConstant(Ty, 0).getReg(0);
10460 }
10461
10462 LLT ExtType = Ty.getScalarType();
10463 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10464 if (NumBits > 8) {
10465 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10466 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10467 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10468 }
10469
10470 // For vector types create a G_BUILD_VECTOR.
10471 if (Ty.isVector())
10472 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10473
10474 return Val;
10475}
10476
10478LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10479 uint64_t KnownLen, Align Alignment,
10480 bool IsVolatile) {
10481 auto &MF = *MI.getParent()->getParent();
10482 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10483 auto &DL = MF.getDataLayout();
10484 LLVMContext &C = MF.getFunction().getContext();
10485
10486 assert(KnownLen != 0 && "Have a zero length memset length!");
10487
10488 bool DstAlignCanChange = false;
10489 MachineFrameInfo &MFI = MF.getFrameInfo();
10490 bool OptSize = shouldLowerMemFuncForSize(MF);
10491
10492 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10493 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10494 DstAlignCanChange = true;
10495
10496 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10497 std::vector<LLT> MemOps;
10498
10499 const auto &DstMMO = **MI.memoperands_begin();
10500 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10501
10502 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10503 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10504
10505 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10506 MemOp::Set(KnownLen, DstAlignCanChange,
10507 Alignment,
10508 /*IsZeroMemset=*/IsZeroVal,
10509 /*IsVolatile=*/IsVolatile),
10510 DstPtrInfo.getAddrSpace(), ~0u,
10511 MF.getFunction().getAttributes(), TLI))
10512 return UnableToLegalize;
10513
10514 if (DstAlignCanChange) {
10515 // Get an estimate of the type from the LLT.
10516 Type *IRTy = getTypeForLLT(MemOps[0], C);
10517 Align NewAlign = DL.getABITypeAlign(IRTy);
10518 if (NewAlign > Alignment) {
10519 Alignment = NewAlign;
10520 unsigned FI = FIDef->getOperand(1).getIndex();
10521 // Give the stack frame object a larger alignment if needed.
10522 if (MFI.getObjectAlign(FI) < Alignment)
10523 MFI.setObjectAlignment(FI, Alignment);
10524 }
10525 }
10526
10527 MachineIRBuilder MIB(MI);
10528 // Find the largest store and generate the bit pattern for it.
10529 LLT LargestTy = MemOps[0];
10530 for (unsigned i = 1; i < MemOps.size(); i++)
10531 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10532 LargestTy = MemOps[i];
10533
10534 // The memset stored value is always defined as an s8, so in order to make it
10535 // work with larger store types we need to repeat the bit pattern across the
10536 // wider type.
10537 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10538
10539 if (!MemSetValue)
10540 return UnableToLegalize;
10541
10542 // Generate the stores. For each store type in the list, we generate the
10543 // matching store of that type to the destination address.
10544 LLT PtrTy = MRI.getType(Dst);
10545 unsigned DstOff = 0;
10546 unsigned Size = KnownLen;
10547 for (unsigned I = 0; I < MemOps.size(); I++) {
10548 LLT Ty = MemOps[I];
10549 unsigned TySize = Ty.getSizeInBytes();
10550 if (TySize > Size) {
10551 // Issuing an unaligned load / store pair that overlaps with the previous
10552 // pair. Adjust the offset accordingly.
10553 assert(I == MemOps.size() - 1 && I != 0);
10554 DstOff -= TySize - Size;
10555 }
10556
10557 // If this store is smaller than the largest store see whether we can get
10558 // the smaller value for free with a truncate.
10559 Register Value = MemSetValue;
10560 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10561 MVT VT = getMVTForLLT(Ty);
10562 MVT LargestVT = getMVTForLLT(LargestTy);
10563 if (!LargestTy.isVector() && !Ty.isVector() &&
10564 TLI.isTruncateFree(LargestVT, VT))
10565 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10566 else
10567 Value = getMemsetValue(Val, Ty, MIB);
10568 if (!Value)
10569 return UnableToLegalize;
10570 }
10571
10572 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10573
10574 Register Ptr = Dst;
10575 if (DstOff != 0) {
10576 auto Offset =
10577 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10578 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10579 }
10580
10581 MIB.buildStore(Value, Ptr, *StoreMMO);
10582 DstOff += Ty.getSizeInBytes();
10583 Size -= TySize;
10584 }
10585
10586 MI.eraseFromParent();
10587 return Legalized;
10588}
10589
10591LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10592 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10593
10594 auto [Dst, Src, Len] = MI.getFirst3Regs();
10595
10596 const auto *MMOIt = MI.memoperands_begin();
10597 const MachineMemOperand *MemOp = *MMOIt;
10598 bool IsVolatile = MemOp->isVolatile();
10599
10600 // See if this is a constant length copy
10601 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10602 // FIXME: support dynamically sized G_MEMCPY_INLINE
10603 assert(LenVRegAndVal &&
10604 "inline memcpy with dynamic size is not yet supported");
10605 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10606 if (KnownLen == 0) {
10607 MI.eraseFromParent();
10608 return Legalized;
10609 }
10610
10611 const auto &DstMMO = **MI.memoperands_begin();
10612 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10613 Align DstAlign = DstMMO.getBaseAlign();
10614 Align SrcAlign = SrcMMO.getBaseAlign();
10615
10616 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10617 IsVolatile);
10618}
10619
10621LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10622 uint64_t KnownLen, Align DstAlign,
10623 Align SrcAlign, bool IsVolatile) {
10624 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10625 return lowerMemcpy(MI, Dst, Src, KnownLen,
10626 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10627 IsVolatile);
10628}
10629
10631LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10632 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10633 Align SrcAlign, bool IsVolatile) {
10634 auto &MF = *MI.getParent()->getParent();
10635 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10636 auto &DL = MF.getDataLayout();
10638
10639 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10640
10641 bool DstAlignCanChange = false;
10642 MachineFrameInfo &MFI = MF.getFrameInfo();
10643 Align Alignment = std::min(DstAlign, SrcAlign);
10644
10645 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10646 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10647 DstAlignCanChange = true;
10648
10649 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10650 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10651 // if the memcpy is in a tail call position.
10652
10653 std::vector<LLT> MemOps;
10654
10655 const auto &DstMMO = **MI.memoperands_begin();
10656 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10657 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10658 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10659
10661 MemOps, Limit,
10662 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10663 IsVolatile),
10664 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10665 MF.getFunction().getAttributes(), TLI))
10666 return UnableToLegalize;
10667
10668 if (DstAlignCanChange) {
10669 // Get an estimate of the type from the LLT.
10670 Type *IRTy = getTypeForLLT(MemOps[0], C);
10671 Align NewAlign = DL.getABITypeAlign(IRTy);
10672
10673 // Don't promote to an alignment that would require dynamic stack
10674 // realignment.
10676 if (!TRI->hasStackRealignment(MF))
10677 if (MaybeAlign StackAlign = DL.getStackAlignment())
10678 NewAlign = std::min(NewAlign, *StackAlign);
10679
10680 if (NewAlign > Alignment) {
10681 Alignment = NewAlign;
10682 unsigned FI = FIDef->getOperand(1).getIndex();
10683 // Give the stack frame object a larger alignment if needed.
10684 if (MFI.getObjectAlign(FI) < Alignment)
10685 MFI.setObjectAlignment(FI, Alignment);
10686 }
10687 }
10688
10689 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10690
10691 MachineIRBuilder MIB(MI);
10692 // Now we need to emit a pair of load and stores for each of the types we've
10693 // collected. I.e. for each type, generate a load from the source pointer of
10694 // that type width, and then generate a corresponding store to the dest buffer
10695 // of that value loaded. This can result in a sequence of loads and stores
10696 // mixed types, depending on what the target specifies as good types to use.
10697 unsigned CurrOffset = 0;
10698 unsigned Size = KnownLen;
10699 for (auto CopyTy : MemOps) {
10700 // Issuing an unaligned load / store pair that overlaps with the previous
10701 // pair. Adjust the offset accordingly.
10702 if (CopyTy.getSizeInBytes() > Size)
10703 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10704
10705 // Construct MMOs for the accesses.
10706 auto *LoadMMO =
10707 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10708 auto *StoreMMO =
10709 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10710
10711 // Create the load.
10712 Register LoadPtr = Src;
10714 if (CurrOffset != 0) {
10715 LLT SrcTy = MRI.getType(Src);
10716 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10717 .getReg(0);
10718 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10719 }
10720 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10721
10722 // Create the store.
10723 Register StorePtr = Dst;
10724 if (CurrOffset != 0) {
10725 LLT DstTy = MRI.getType(Dst);
10726 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10727 }
10728 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10729 CurrOffset += CopyTy.getSizeInBytes();
10730 Size -= CopyTy.getSizeInBytes();
10731 }
10732
10733 MI.eraseFromParent();
10734 return Legalized;
10735}
10736
10738LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10739 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10740 bool IsVolatile) {
10741 auto &MF = *MI.getParent()->getParent();
10742 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10743 auto &DL = MF.getDataLayout();
10744 LLVMContext &C = MF.getFunction().getContext();
10745
10746 assert(KnownLen != 0 && "Have a zero length memmove length!");
10747
10748 bool DstAlignCanChange = false;
10749 MachineFrameInfo &MFI = MF.getFrameInfo();
10750 bool OptSize = shouldLowerMemFuncForSize(MF);
10751 Align Alignment = std::min(DstAlign, SrcAlign);
10752
10753 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10754 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10755 DstAlignCanChange = true;
10756
10757 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10758 std::vector<LLT> MemOps;
10759
10760 const auto &DstMMO = **MI.memoperands_begin();
10761 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10762 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10763 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10764
10765 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10766 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10767 // same thing here.
10769 MemOps, Limit,
10770 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10771 /*IsVolatile*/ true),
10772 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10773 MF.getFunction().getAttributes(), TLI))
10774 return UnableToLegalize;
10775
10776 if (DstAlignCanChange) {
10777 // Get an estimate of the type from the LLT.
10778 Type *IRTy = getTypeForLLT(MemOps[0], C);
10779 Align NewAlign = DL.getABITypeAlign(IRTy);
10780
10781 // Don't promote to an alignment that would require dynamic stack
10782 // realignment.
10783 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10784 if (!TRI->hasStackRealignment(MF))
10785 if (MaybeAlign StackAlign = DL.getStackAlignment())
10786 NewAlign = std::min(NewAlign, *StackAlign);
10787
10788 if (NewAlign > Alignment) {
10789 Alignment = NewAlign;
10790 unsigned FI = FIDef->getOperand(1).getIndex();
10791 // Give the stack frame object a larger alignment if needed.
10792 if (MFI.getObjectAlign(FI) < Alignment)
10793 MFI.setObjectAlignment(FI, Alignment);
10794 }
10795 }
10796
10797 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10798
10799 MachineIRBuilder MIB(MI);
10800 // Memmove requires that we perform the loads first before issuing the stores.
10801 // Apart from that, this loop is pretty much doing the same thing as the
10802 // memcpy codegen function.
10803 unsigned CurrOffset = 0;
10804 SmallVector<Register, 16> LoadVals;
10805 for (auto CopyTy : MemOps) {
10806 // Construct MMO for the load.
10807 auto *LoadMMO =
10808 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10809
10810 // Create the load.
10811 Register LoadPtr = Src;
10812 if (CurrOffset != 0) {
10813 LLT SrcTy = MRI.getType(Src);
10814 auto Offset =
10815 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10816 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10817 }
10818 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10819 CurrOffset += CopyTy.getSizeInBytes();
10820 }
10821
10822 CurrOffset = 0;
10823 for (unsigned I = 0; I < MemOps.size(); ++I) {
10824 LLT CopyTy = MemOps[I];
10825 // Now store the values loaded.
10826 auto *StoreMMO =
10827 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10828
10829 Register StorePtr = Dst;
10830 if (CurrOffset != 0) {
10831 LLT DstTy = MRI.getType(Dst);
10832 auto Offset =
10833 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10834 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10835 }
10836 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10837 CurrOffset += CopyTy.getSizeInBytes();
10838 }
10839 MI.eraseFromParent();
10840 return Legalized;
10841}
10842
10845 const unsigned Opc = MI.getOpcode();
10846 // This combine is fairly complex so it's not written with a separate
10847 // matcher function.
10848 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10849 Opc == TargetOpcode::G_MEMSET) &&
10850 "Expected memcpy like instruction");
10851
10852 auto MMOIt = MI.memoperands_begin();
10853 const MachineMemOperand *MemOp = *MMOIt;
10854
10855 Align DstAlign = MemOp->getBaseAlign();
10856 Align SrcAlign;
10857 auto [Dst, Src, Len] = MI.getFirst3Regs();
10858
10859 if (Opc != TargetOpcode::G_MEMSET) {
10860 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10861 MemOp = *(++MMOIt);
10862 SrcAlign = MemOp->getBaseAlign();
10863 }
10864
10865 // See if this is a constant length copy
10866 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10867 if (!LenVRegAndVal)
10868 return UnableToLegalize;
10869 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10870
10871 if (KnownLen == 0) {
10872 MI.eraseFromParent();
10873 return Legalized;
10874 }
10875
10876 if (MaxLen && KnownLen > MaxLen)
10877 return UnableToLegalize;
10878
10879 bool IsVolatile = MemOp->isVolatile();
10880 if (Opc == TargetOpcode::G_MEMCPY) {
10881 auto &MF = *MI.getParent()->getParent();
10882 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10883 bool OptSize = shouldLowerMemFuncForSize(MF);
10884 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10885 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10886 IsVolatile);
10887 }
10888 if (Opc == TargetOpcode::G_MEMMOVE)
10889 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10890 if (Opc == TargetOpcode::G_MEMSET)
10891 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10892 return UnableToLegalize;
10893}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1410
APInt bitcastToAPInt() const
Definition APFloat.h:1416
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1201
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1161
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1172
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1477
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:325
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:624
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:632
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2042
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:654
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1571
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1628
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1195
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:509
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1883
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:349
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1283
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:612
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.