LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
642 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
643
645 const CallingConv::ID CC = TLI.getLibcallImplCallingConv(LibcallImpl);
646 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
647}
648
649// Useful for libcalls where all operands have the same type.
652 unsigned Size, Type *OpType,
653 LostDebugLocObserver &LocObserver) const {
654 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
655
656 // FIXME: What does the original arg index mean here?
658 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
659 Args.push_back({MO.getReg(), OpType, 0});
660 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
661 LocObserver, &MI);
662}
663
664LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
665 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
666 LostDebugLocObserver &LocObserver) {
667 MachineFunction &MF = *MI.getMF();
669
670 Register DstSin = MI.getOperand(0).getReg();
671 Register DstCos = MI.getOperand(1).getReg();
672 Register Src = MI.getOperand(2).getReg();
673 LLT DstTy = MRI.getType(DstSin);
674
675 int MemSize = DstTy.getSizeInBytes();
676 Align Alignment = getStackTemporaryAlignment(DstTy);
678 unsigned AddrSpace = DL.getAllocaAddrSpace();
679 MachinePointerInfo PtrInfo;
680
681 Register StackPtrSin =
682 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
683 .getReg(0);
684 Register StackPtrCos =
685 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
686 .getReg(0);
687
688 auto &Ctx = MF.getFunction().getContext();
689 auto LibcallResult = createLibcall(
690 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
691 {{Src, OpType, 0},
692 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
693 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
694 LocObserver, &MI);
695
696 if (LibcallResult != LegalizeResult::Legalized)
698
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
702 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
703
704 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
705 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
706 MI.eraseFromParent();
707
709}
710
712LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
713 unsigned Size, Type *OpType,
714 LostDebugLocObserver &LocObserver) {
715 MachineFunction &MF = MIRBuilder.getMF();
716 MachineRegisterInfo &MRI = MF.getRegInfo();
717
718 Register DstFrac = MI.getOperand(0).getReg();
719 Register DstInt = MI.getOperand(1).getReg();
720 Register Src = MI.getOperand(2).getReg();
721 LLT DstTy = MRI.getType(DstFrac);
722
723 int MemSize = DstTy.getSizeInBytes();
724 Align Alignment = getStackTemporaryAlignment(DstTy);
725 const DataLayout &DL = MIRBuilder.getDataLayout();
726 unsigned AddrSpace = DL.getAllocaAddrSpace();
727 MachinePointerInfo PtrInfo;
728
729 Register StackPtrInt =
730 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
731 .getReg(0);
732
733 auto &Ctx = MF.getFunction().getContext();
734 auto LibcallResult = createLibcall(
735 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
736 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 LocObserver, &MI);
738
739 if (LibcallResult != LegalizeResult::Legalized)
741
743 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
744
745 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
746 MI.eraseFromParent();
747
749}
750
751static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
752 Type *FromType) {
753 auto ToMVT = MVT::getVT(ToType);
754 auto FromMVT = MVT::getVT(FromType);
755
756 switch (Opcode) {
757 case TargetOpcode::G_FPEXT:
758 return RTLIB::getFPEXT(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTRUNC:
760 return RTLIB::getFPROUND(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOSI:
762 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
763 case TargetOpcode::G_FPTOUI:
764 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
765 case TargetOpcode::G_SITOFP:
766 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
767 case TargetOpcode::G_UITOFP:
768 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
769 }
770 llvm_unreachable("Unsupported libcall function");
771}
772
774 MachineInstr &MI, Type *ToType, Type *FromType,
775 LostDebugLocObserver &LocObserver, bool IsSigned) const {
776 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
777 if (FromType->isIntegerTy()) {
778 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
779 Arg.Flags[0].setSExt();
780 else
781 Arg.Flags[0].setZExt();
782 }
783
784 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
785 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
786 LocObserver, &MI);
787}
788
791 LostDebugLocObserver &LocObserver) const {
792 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
793
795 // Add all the args, except for the last which is an imm denoting 'tail'.
796 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
797 Register Reg = MI.getOperand(i).getReg();
798
799 // Need derive an IR type for call lowering.
800 LLT OpLLT = MRI.getType(Reg);
801 Type *OpTy = nullptr;
802 if (OpLLT.isPointer())
803 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
804 else
805 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
806 Args.push_back({Reg, OpTy, 0});
807 }
808
809 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
810 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
811 RTLIB::Libcall RTLibcall;
812 unsigned Opc = MI.getOpcode();
813 const char *Name;
814 switch (Opc) {
815 case TargetOpcode::G_BZERO:
816 RTLibcall = RTLIB::BZERO;
817 Name = TLI.getLibcallName(RTLibcall);
818 break;
819 case TargetOpcode::G_MEMCPY:
820 RTLibcall = RTLIB::MEMCPY;
821 Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
822 Args[0].Flags[0].setReturned();
823 break;
824 case TargetOpcode::G_MEMMOVE:
825 RTLibcall = RTLIB::MEMMOVE;
826 Name = TLI.getLibcallName(RTLibcall);
827 Args[0].Flags[0].setReturned();
828 break;
829 case TargetOpcode::G_MEMSET:
830 RTLibcall = RTLIB::MEMSET;
831 Name = TLI.getLibcallName(RTLibcall);
832 Args[0].Flags[0].setReturned();
833 break;
834 default:
835 llvm_unreachable("unsupported opcode");
836 }
837
838 // Unsupported libcall on the target.
839 if (!Name) {
840 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
841 << MIRBuilder.getTII().getName(Opc) << "\n");
843 }
844
846 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
847 Info.Callee = MachineOperand::CreateES(Name);
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
984 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
985 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
986 const char *Name = TLI.getLibcallName(RTLibcall);
987
988 // Unsupported libcall on the target.
989 if (!Name) {
990 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
991 << MIRBuilder.getTII().getName(Opc) << "\n");
993 }
994
996 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
997 Info.Callee = MachineOperand::CreateES(Name);
998 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
999
1000 llvm::append_range(Info.OrigArgs, Args);
1001 if (!CLI.lowerCall(MIRBuilder, Info))
1003
1005}
1006
1007static RTLIB::Libcall
1009 RTLIB::Libcall RTLibcall;
1010 switch (MI.getOpcode()) {
1011 case TargetOpcode::G_GET_FPENV:
1012 RTLibcall = RTLIB::FEGETENV;
1013 break;
1014 case TargetOpcode::G_SET_FPENV:
1015 case TargetOpcode::G_RESET_FPENV:
1016 RTLibcall = RTLIB::FESETENV;
1017 break;
1018 case TargetOpcode::G_GET_FPMODE:
1019 RTLibcall = RTLIB::FEGETMODE;
1020 break;
1021 case TargetOpcode::G_SET_FPMODE:
1022 case TargetOpcode::G_RESET_FPMODE:
1023 RTLibcall = RTLIB::FESETMODE;
1024 break;
1025 default:
1026 llvm_unreachable("Unexpected opcode");
1027 }
1028 return RTLibcall;
1029}
1030
1031// Some library functions that read FP state (fegetmode, fegetenv) write the
1032// state into a region in memory. IR intrinsics that do the same operations
1033// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1034// intrinsics via the library functions, we need to use temporary variable,
1035// for example:
1036//
1037// %0:_(s32) = G_GET_FPMODE
1038//
1039// is transformed to:
1040//
1041// %1:_(p0) = G_FRAME_INDEX %stack.0
1042// BL &fegetmode
1043// %0:_(s32) = G_LOAD % 1
1044//
1046LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1047 LostDebugLocObserver &LocObserver) {
1048 const DataLayout &DL = MIRBuilder.getDataLayout();
1049 auto &MF = MIRBuilder.getMF();
1050 auto &MRI = *MIRBuilder.getMRI();
1051 auto &Ctx = MF.getFunction().getContext();
1052
1053 // Create temporary, where library function will put the read state.
1054 Register Dst = MI.getOperand(0).getReg();
1055 LLT StateTy = MRI.getType(Dst);
1056 TypeSize StateSize = StateTy.getSizeInBytes();
1057 Align TempAlign = getStackTemporaryAlignment(StateTy);
1058 MachinePointerInfo TempPtrInfo;
1059 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1060
1061 // Create a call to library function, with the temporary as an argument.
1062 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1063 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1064 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1065 auto Res = createLibcall(
1066 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1067 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1068 nullptr);
1069 if (Res != LegalizerHelper::Legalized)
1070 return Res;
1071
1072 // Create a load from the temporary.
1073 MachineMemOperand *MMO = MF.getMachineMemOperand(
1074 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1075 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1076
1078}
1079
1080// Similar to `createGetStateLibcall` the function calls a library function
1081// using transient space in stack. In this case the library function reads
1082// content of memory region.
1084LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1085 LostDebugLocObserver &LocObserver) {
1086 const DataLayout &DL = MIRBuilder.getDataLayout();
1087 auto &MF = MIRBuilder.getMF();
1088 auto &MRI = *MIRBuilder.getMRI();
1089 auto &Ctx = MF.getFunction().getContext();
1090
1091 // Create temporary, where library function will get the new state.
1092 Register Src = MI.getOperand(0).getReg();
1093 LLT StateTy = MRI.getType(Src);
1094 TypeSize StateSize = StateTy.getSizeInBytes();
1095 Align TempAlign = getStackTemporaryAlignment(StateTy);
1096 MachinePointerInfo TempPtrInfo;
1097 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1098
1099 // Put the new state into the temporary.
1100 MachineMemOperand *MMO = MF.getMachineMemOperand(
1101 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1102 MIRBuilder.buildStore(Src, Temp, *MMO);
1103
1104 // Create a call to library function, with the temporary as an argument.
1105 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1106 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1107 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1108 return createLibcall(RTLibcall,
1109 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1110 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1111 LocObserver, nullptr);
1112}
1113
1114/// Returns the corresponding libcall for the given Pred and
1115/// the ICMP predicate that should be generated to compare with #0
1116/// after the libcall.
1117static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1119#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1120 do { \
1121 switch (Size) { \
1122 case 32: \
1123 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1124 case 64: \
1125 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1126 case 128: \
1127 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1128 default: \
1129 llvm_unreachable("unexpected size"); \
1130 } \
1131 } while (0)
1132
1133 switch (Pred) {
1134 case CmpInst::FCMP_OEQ:
1136 case CmpInst::FCMP_UNE:
1138 case CmpInst::FCMP_OGE:
1140 case CmpInst::FCMP_OLT:
1142 case CmpInst::FCMP_OLE:
1144 case CmpInst::FCMP_OGT:
1146 case CmpInst::FCMP_UNO:
1148 default:
1149 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1150 }
1151}
1152
1154LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1155 LostDebugLocObserver &LocObserver) {
1156 auto &MF = MIRBuilder.getMF();
1157 auto &Ctx = MF.getFunction().getContext();
1158 const GFCmp *Cmp = cast<GFCmp>(&MI);
1159
1160 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1161 unsigned Size = OpLLT.getSizeInBits();
1162 if ((Size != 32 && Size != 64 && Size != 128) ||
1163 OpLLT != MRI.getType(Cmp->getRHSReg()))
1164 return UnableToLegalize;
1165
1166 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1167
1168 // DstReg type is s32
1169 const Register DstReg = Cmp->getReg(0);
1170 LLT DstTy = MRI.getType(DstReg);
1171 const auto Cond = Cmp->getCond();
1172
1173 // Reference:
1174 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1175 // Generates a libcall followed by ICMP.
1176 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1177 const CmpInst::Predicate ICmpPred,
1178 const DstOp &Res) -> Register {
1179 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1180 constexpr LLT TempLLT = LLT::scalar(32);
1181 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1182 // Generate libcall, holding result in Temp
1183 const auto Status = createLibcall(
1184 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1185 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1186 LocObserver, &MI);
1187 if (!Status)
1188 return {};
1189
1190 // Compare temp with #0 to get the final result.
1191 return MIRBuilder
1192 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1193 .getReg(0);
1194 };
1195
1196 // Simple case if we have a direct mapping from predicate to libcall
1197 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1198 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1199 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1200 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1201 return Legalized;
1202 }
1203 return UnableToLegalize;
1204 }
1205
1206 // No direct mapping found, should be generated as combination of libcalls.
1207
1208 switch (Cond) {
1209 case CmpInst::FCMP_UEQ: {
1210 // FCMP_UEQ: unordered or equal
1211 // Convert into (FCMP_OEQ || FCMP_UNO).
1212
1213 const auto [OeqLibcall, OeqPred] =
1215 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1216
1217 const auto [UnoLibcall, UnoPred] =
1219 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1220 if (Oeq && Uno)
1221 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1222 else
1223 return UnableToLegalize;
1224
1225 break;
1226 }
1227 case CmpInst::FCMP_ONE: {
1228 // FCMP_ONE: ordered and operands are unequal
1229 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1230
1231 // We inverse the predicate instead of generating a NOT
1232 // to save one instruction.
1233 // On AArch64 isel can even select two cmp into a single ccmp.
1234 const auto [OeqLibcall, OeqPred] =
1236 const auto NotOeq =
1237 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1238
1239 const auto [UnoLibcall, UnoPred] =
1241 const auto NotUno =
1242 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1243
1244 if (NotOeq && NotUno)
1245 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1246 else
1247 return UnableToLegalize;
1248
1249 break;
1250 }
1251 case CmpInst::FCMP_ULT:
1252 case CmpInst::FCMP_UGE:
1253 case CmpInst::FCMP_UGT:
1254 case CmpInst::FCMP_ULE:
1255 case CmpInst::FCMP_ORD: {
1256 // Convert into: !(inverse(Pred))
1257 // E.g. FCMP_ULT becomes !FCMP_OGE
1258 // This is equivalent to the following, but saves some instructions.
1259 // MIRBuilder.buildNot(
1260 // PredTy,
1261 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1262 // Op1, Op2));
1263 const auto [InversedLibcall, InversedPred] =
1265 if (!BuildLibcall(InversedLibcall,
1266 CmpInst::getInversePredicate(InversedPred), DstReg))
1267 return UnableToLegalize;
1268 break;
1269 }
1270 default:
1271 return UnableToLegalize;
1272 }
1273
1274 return Legalized;
1275}
1276
1277// The function is used to legalize operations that set default environment
1278// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1279// On most targets supported in glibc FE_DFL_MODE is defined as
1280// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1281// it is not true, the target must provide custom lowering.
1283LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1284 LostDebugLocObserver &LocObserver) {
1285 const DataLayout &DL = MIRBuilder.getDataLayout();
1286 auto &MF = MIRBuilder.getMF();
1287 auto &Ctx = MF.getFunction().getContext();
1288
1289 // Create an argument for the library function.
1290 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1291 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1292 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1293 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1294 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1295 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1296 MIRBuilder.buildIntToPtr(Dest, DefValue);
1297
1298 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1299 return createLibcall(
1300 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1301 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1302}
1303
1306 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1307
1308 switch (MI.getOpcode()) {
1309 default:
1310 return UnableToLegalize;
1311 case TargetOpcode::G_MUL:
1312 case TargetOpcode::G_SDIV:
1313 case TargetOpcode::G_UDIV:
1314 case TargetOpcode::G_SREM:
1315 case TargetOpcode::G_UREM:
1316 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1317 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1318 unsigned Size = LLTy.getSizeInBits();
1319 Type *HLTy = IntegerType::get(Ctx, Size);
1320 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1321 if (Status != Legalized)
1322 return Status;
1323 break;
1324 }
1325 case TargetOpcode::G_FADD:
1326 case TargetOpcode::G_FSUB:
1327 case TargetOpcode::G_FMUL:
1328 case TargetOpcode::G_FDIV:
1329 case TargetOpcode::G_FMA:
1330 case TargetOpcode::G_FPOW:
1331 case TargetOpcode::G_FREM:
1332 case TargetOpcode::G_FCOS:
1333 case TargetOpcode::G_FSIN:
1334 case TargetOpcode::G_FTAN:
1335 case TargetOpcode::G_FACOS:
1336 case TargetOpcode::G_FASIN:
1337 case TargetOpcode::G_FATAN:
1338 case TargetOpcode::G_FATAN2:
1339 case TargetOpcode::G_FCOSH:
1340 case TargetOpcode::G_FSINH:
1341 case TargetOpcode::G_FTANH:
1342 case TargetOpcode::G_FLOG10:
1343 case TargetOpcode::G_FLOG:
1344 case TargetOpcode::G_FLOG2:
1345 case TargetOpcode::G_FEXP:
1346 case TargetOpcode::G_FEXP2:
1347 case TargetOpcode::G_FEXP10:
1348 case TargetOpcode::G_FCEIL:
1349 case TargetOpcode::G_FFLOOR:
1350 case TargetOpcode::G_FMINNUM:
1351 case TargetOpcode::G_FMAXNUM:
1352 case TargetOpcode::G_FMINIMUMNUM:
1353 case TargetOpcode::G_FMAXIMUMNUM:
1354 case TargetOpcode::G_FSQRT:
1355 case TargetOpcode::G_FRINT:
1356 case TargetOpcode::G_FNEARBYINT:
1357 case TargetOpcode::G_INTRINSIC_TRUNC:
1358 case TargetOpcode::G_INTRINSIC_ROUND:
1359 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1360 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1361 unsigned Size = LLTy.getSizeInBits();
1362 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1363 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1364 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1365 return UnableToLegalize;
1366 }
1367 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1368 if (Status != Legalized)
1369 return Status;
1370 break;
1371 }
1372 case TargetOpcode::G_FSINCOS: {
1373 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1374 unsigned Size = LLTy.getSizeInBits();
1375 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1376 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1377 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1378 return UnableToLegalize;
1379 }
1380 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1381 }
1382 case TargetOpcode::G_FMODF: {
1383 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1384 unsigned Size = LLTy.getSizeInBits();
1385 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1386 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1387 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1388 return UnableToLegalize;
1389 }
1390 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1391 }
1392 case TargetOpcode::G_LROUND:
1393 case TargetOpcode::G_LLROUND:
1394 case TargetOpcode::G_INTRINSIC_LRINT:
1395 case TargetOpcode::G_INTRINSIC_LLRINT: {
1396 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1397 unsigned Size = LLTy.getSizeInBits();
1398 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1399 Type *ITy = IntegerType::get(
1400 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1401 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1402 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1403 return UnableToLegalize;
1404 }
1405 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1407 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1408 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1409 if (Status != Legalized)
1410 return Status;
1411 MI.eraseFromParent();
1412 return Legalized;
1413 }
1414 case TargetOpcode::G_FPOWI:
1415 case TargetOpcode::G_FLDEXP: {
1416 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1417 unsigned Size = LLTy.getSizeInBits();
1418 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1419 Type *ITy = IntegerType::get(
1420 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1421 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1422 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1423 return UnableToLegalize;
1424 }
1425 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1427 {MI.getOperand(1).getReg(), HLTy, 0},
1428 {MI.getOperand(2).getReg(), ITy, 1}};
1429 Args[1].Flags[0].setSExt();
1431 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1432 if (Status != Legalized)
1433 return Status;
1434 break;
1435 }
1436 case TargetOpcode::G_FPEXT:
1437 case TargetOpcode::G_FPTRUNC: {
1438 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1439 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1440 if (!FromTy || !ToTy)
1441 return UnableToLegalize;
1442 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1443 if (Status != Legalized)
1444 return Status;
1445 break;
1446 }
1447 case TargetOpcode::G_FCMP: {
1448 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1449 if (Status != Legalized)
1450 return Status;
1451 MI.eraseFromParent();
1452 return Status;
1453 }
1454 case TargetOpcode::G_FPTOSI:
1455 case TargetOpcode::G_FPTOUI: {
1456 // FIXME: Support other types
1457 Type *FromTy =
1458 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1459 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1460 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1461 return UnableToLegalize;
1463 FromTy, LocObserver);
1464 if (Status != Legalized)
1465 return Status;
1466 break;
1467 }
1468 case TargetOpcode::G_SITOFP:
1469 case TargetOpcode::G_UITOFP: {
1470 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1471 Type *ToTy =
1472 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1473 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1474 return UnableToLegalize;
1475 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1477 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1478 if (Status != Legalized)
1479 return Status;
1480 break;
1481 }
1482 case TargetOpcode::G_ATOMICRMW_XCHG:
1483 case TargetOpcode::G_ATOMICRMW_ADD:
1484 case TargetOpcode::G_ATOMICRMW_SUB:
1485 case TargetOpcode::G_ATOMICRMW_AND:
1486 case TargetOpcode::G_ATOMICRMW_OR:
1487 case TargetOpcode::G_ATOMICRMW_XOR:
1488 case TargetOpcode::G_ATOMIC_CMPXCHG:
1489 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1491 if (Status != Legalized)
1492 return Status;
1493 break;
1494 }
1495 case TargetOpcode::G_BZERO:
1496 case TargetOpcode::G_MEMCPY:
1497 case TargetOpcode::G_MEMMOVE:
1498 case TargetOpcode::G_MEMSET: {
1499 LegalizeResult Result =
1500 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1501 if (Result != Legalized)
1502 return Result;
1503 MI.eraseFromParent();
1504 return Result;
1505 }
1506 case TargetOpcode::G_GET_FPENV:
1507 case TargetOpcode::G_GET_FPMODE: {
1508 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1509 if (Result != Legalized)
1510 return Result;
1511 break;
1512 }
1513 case TargetOpcode::G_SET_FPENV:
1514 case TargetOpcode::G_SET_FPMODE: {
1515 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1516 if (Result != Legalized)
1517 return Result;
1518 break;
1519 }
1520 case TargetOpcode::G_RESET_FPENV:
1521 case TargetOpcode::G_RESET_FPMODE: {
1522 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1523 if (Result != Legalized)
1524 return Result;
1525 break;
1526 }
1527 }
1528
1529 MI.eraseFromParent();
1530 return Legalized;
1531}
1532
1534 unsigned TypeIdx,
1535 LLT NarrowTy) {
1536 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1537 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1538
1539 switch (MI.getOpcode()) {
1540 default:
1541 return UnableToLegalize;
1542 case TargetOpcode::G_IMPLICIT_DEF: {
1543 Register DstReg = MI.getOperand(0).getReg();
1544 LLT DstTy = MRI.getType(DstReg);
1545
1546 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1547 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1548 // FIXME: Although this would also be legal for the general case, it causes
1549 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1550 // combines not being hit). This seems to be a problem related to the
1551 // artifact combiner.
1552 if (SizeOp0 % NarrowSize != 0) {
1553 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1554 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1555 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1556
1557 MI.eraseFromParent();
1558 return Legalized;
1559 }
1560
1561 int NumParts = SizeOp0 / NarrowSize;
1562
1564 for (int i = 0; i < NumParts; ++i)
1565 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1566
1567 if (DstTy.isVector())
1568 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1569 else
1570 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1571 MI.eraseFromParent();
1572 return Legalized;
1573 }
1574 case TargetOpcode::G_CONSTANT: {
1575 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1576 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1577 unsigned TotalSize = Ty.getSizeInBits();
1578 unsigned NarrowSize = NarrowTy.getSizeInBits();
1579 int NumParts = TotalSize / NarrowSize;
1580
1581 SmallVector<Register, 4> PartRegs;
1582 for (int I = 0; I != NumParts; ++I) {
1583 unsigned Offset = I * NarrowSize;
1584 auto K = MIRBuilder.buildConstant(NarrowTy,
1585 Val.lshr(Offset).trunc(NarrowSize));
1586 PartRegs.push_back(K.getReg(0));
1587 }
1588
1589 LLT LeftoverTy;
1590 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1591 SmallVector<Register, 1> LeftoverRegs;
1592 if (LeftoverBits != 0) {
1593 LeftoverTy = LLT::scalar(LeftoverBits);
1594 auto K = MIRBuilder.buildConstant(
1595 LeftoverTy,
1596 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1597 LeftoverRegs.push_back(K.getReg(0));
1598 }
1599
1600 insertParts(MI.getOperand(0).getReg(),
1601 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1602
1603 MI.eraseFromParent();
1604 return Legalized;
1605 }
1606 case TargetOpcode::G_SEXT:
1607 case TargetOpcode::G_ZEXT:
1608 case TargetOpcode::G_ANYEXT:
1609 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1610 case TargetOpcode::G_TRUNC: {
1611 if (TypeIdx != 1)
1612 return UnableToLegalize;
1613
1614 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1615 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1616 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1617 return UnableToLegalize;
1618 }
1619
1620 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1621 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1622 MI.eraseFromParent();
1623 return Legalized;
1624 }
1625 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1626 case TargetOpcode::G_FREEZE: {
1627 if (TypeIdx != 0)
1628 return UnableToLegalize;
1629
1630 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1631 // Should widen scalar first
1632 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1633 return UnableToLegalize;
1634
1635 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1637 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1638 Parts.push_back(
1639 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1640 .getReg(0));
1641 }
1642
1643 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1644 MI.eraseFromParent();
1645 return Legalized;
1646 }
1647 case TargetOpcode::G_ADD:
1648 case TargetOpcode::G_SUB:
1649 case TargetOpcode::G_SADDO:
1650 case TargetOpcode::G_SSUBO:
1651 case TargetOpcode::G_SADDE:
1652 case TargetOpcode::G_SSUBE:
1653 case TargetOpcode::G_UADDO:
1654 case TargetOpcode::G_USUBO:
1655 case TargetOpcode::G_UADDE:
1656 case TargetOpcode::G_USUBE:
1657 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1658 case TargetOpcode::G_MUL:
1659 case TargetOpcode::G_UMULH:
1660 return narrowScalarMul(MI, NarrowTy);
1661 case TargetOpcode::G_EXTRACT:
1662 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_INSERT:
1664 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1665 case TargetOpcode::G_LOAD: {
1666 auto &LoadMI = cast<GLoad>(MI);
1667 Register DstReg = LoadMI.getDstReg();
1668 LLT DstTy = MRI.getType(DstReg);
1669 if (DstTy.isVector())
1670 return UnableToLegalize;
1671
1672 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1673 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1674 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1675 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1676 LoadMI.eraseFromParent();
1677 return Legalized;
1678 }
1679
1680 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1681 }
1682 case TargetOpcode::G_ZEXTLOAD:
1683 case TargetOpcode::G_SEXTLOAD: {
1684 auto &LoadMI = cast<GExtLoad>(MI);
1685 Register DstReg = LoadMI.getDstReg();
1686 Register PtrReg = LoadMI.getPointerReg();
1687
1688 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1689 auto &MMO = LoadMI.getMMO();
1690 unsigned MemSize = MMO.getSizeInBits().getValue();
1691
1692 if (MemSize == NarrowSize) {
1693 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1694 } else if (MemSize < NarrowSize) {
1695 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1696 } else if (MemSize > NarrowSize) {
1697 // FIXME: Need to split the load.
1698 return UnableToLegalize;
1699 }
1700
1701 if (isa<GZExtLoad>(LoadMI))
1702 MIRBuilder.buildZExt(DstReg, TmpReg);
1703 else
1704 MIRBuilder.buildSExt(DstReg, TmpReg);
1705
1706 LoadMI.eraseFromParent();
1707 return Legalized;
1708 }
1709 case TargetOpcode::G_STORE: {
1710 auto &StoreMI = cast<GStore>(MI);
1711
1712 Register SrcReg = StoreMI.getValueReg();
1713 LLT SrcTy = MRI.getType(SrcReg);
1714 if (SrcTy.isVector())
1715 return UnableToLegalize;
1716
1717 int NumParts = SizeOp0 / NarrowSize;
1718 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1719 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1720 if (SrcTy.isVector() && LeftoverBits != 0)
1721 return UnableToLegalize;
1722
1723 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1724 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1725 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1726 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1727 StoreMI.eraseFromParent();
1728 return Legalized;
1729 }
1730
1731 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1732 }
1733 case TargetOpcode::G_SELECT:
1734 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1735 case TargetOpcode::G_AND:
1736 case TargetOpcode::G_OR:
1737 case TargetOpcode::G_XOR: {
1738 // Legalize bitwise operation:
1739 // A = BinOp<Ty> B, C
1740 // into:
1741 // B1, ..., BN = G_UNMERGE_VALUES B
1742 // C1, ..., CN = G_UNMERGE_VALUES C
1743 // A1 = BinOp<Ty/N> B1, C2
1744 // ...
1745 // AN = BinOp<Ty/N> BN, CN
1746 // A = G_MERGE_VALUES A1, ..., AN
1747 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1748 }
1749 case TargetOpcode::G_SHL:
1750 case TargetOpcode::G_LSHR:
1751 case TargetOpcode::G_ASHR:
1752 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1753 case TargetOpcode::G_CTLZ:
1754 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1755 case TargetOpcode::G_CTTZ:
1756 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1757 case TargetOpcode::G_CTLS:
1758 case TargetOpcode::G_CTPOP:
1759 if (TypeIdx == 1)
1760 switch (MI.getOpcode()) {
1761 case TargetOpcode::G_CTLZ:
1762 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1763 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1764 case TargetOpcode::G_CTTZ:
1765 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1766 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1767 case TargetOpcode::G_CTPOP:
1768 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1769 case TargetOpcode::G_CTLS:
1770 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1771 default:
1772 return UnableToLegalize;
1773 }
1774
1775 Observer.changingInstr(MI);
1776 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1777 Observer.changedInstr(MI);
1778 return Legalized;
1779 case TargetOpcode::G_INTTOPTR:
1780 if (TypeIdx != 1)
1781 return UnableToLegalize;
1782
1783 Observer.changingInstr(MI);
1784 narrowScalarSrc(MI, NarrowTy, 1);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787 case TargetOpcode::G_PTRTOINT:
1788 if (TypeIdx != 0)
1789 return UnableToLegalize;
1790
1791 Observer.changingInstr(MI);
1792 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1793 Observer.changedInstr(MI);
1794 return Legalized;
1795 case TargetOpcode::G_PHI: {
1796 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1797 // NarrowSize.
1798 if (SizeOp0 % NarrowSize != 0)
1799 return UnableToLegalize;
1800
1801 unsigned NumParts = SizeOp0 / NarrowSize;
1802 SmallVector<Register, 2> DstRegs(NumParts);
1803 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1804 Observer.changingInstr(MI);
1805 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1806 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1807 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1808 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1809 SrcRegs[i / 2], MIRBuilder, MRI);
1810 }
1811 MachineBasicBlock &MBB = *MI.getParent();
1812 MIRBuilder.setInsertPt(MBB, MI);
1813 for (unsigned i = 0; i < NumParts; ++i) {
1814 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1816 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1817 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1818 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1819 }
1820 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1821 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1822 Observer.changedInstr(MI);
1823 MI.eraseFromParent();
1824 return Legalized;
1825 }
1826 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1827 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1828 if (TypeIdx != 2)
1829 return UnableToLegalize;
1830
1831 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1832 Observer.changingInstr(MI);
1833 narrowScalarSrc(MI, NarrowTy, OpIdx);
1834 Observer.changedInstr(MI);
1835 return Legalized;
1836 }
1837 case TargetOpcode::G_ICMP: {
1838 Register LHS = MI.getOperand(2).getReg();
1839 LLT SrcTy = MRI.getType(LHS);
1840 CmpInst::Predicate Pred =
1841 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1842
1843 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1844 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1845 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1846 LHSLeftoverRegs, MIRBuilder, MRI))
1847 return UnableToLegalize;
1848
1849 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1850 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1851 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1852 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1853 return UnableToLegalize;
1854
1855 // We now have the LHS and RHS of the compare split into narrow-type
1856 // registers, plus potentially some leftover type.
1857 Register Dst = MI.getOperand(0).getReg();
1858 LLT ResTy = MRI.getType(Dst);
1859 if (ICmpInst::isEquality(Pred)) {
1860 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1861 // them together. For each equal part, the result should be all 0s. For
1862 // each non-equal part, we'll get at least one 1.
1863 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1865 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1866 auto LHS = std::get<0>(LHSAndRHS);
1867 auto RHS = std::get<1>(LHSAndRHS);
1868 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1869 Xors.push_back(Xor);
1870 }
1871
1872 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1873 // to the desired narrow type so that we can OR them together later.
1874 SmallVector<Register, 4> WidenedXors;
1875 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1876 auto LHS = std::get<0>(LHSAndRHS);
1877 auto RHS = std::get<1>(LHSAndRHS);
1878 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1879 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1880 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1881 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1882 llvm::append_range(Xors, WidenedXors);
1883 }
1884
1885 // Now, for each part we broke up, we know if they are equal/not equal
1886 // based off the G_XOR. We can OR these all together and compare against
1887 // 0 to get the result.
1888 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1889 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1890 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1891 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1892 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1893 } else {
1894 Register CmpIn;
1895 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1896 Register CmpOut;
1897 CmpInst::Predicate PartPred;
1898
1899 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1900 PartPred = Pred;
1901 CmpOut = Dst;
1902 } else {
1903 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1904 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1905 }
1906
1907 if (!CmpIn) {
1908 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1909 RHSPartRegs[I]);
1910 } else {
1911 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1912 RHSPartRegs[I]);
1913 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1914 LHSPartRegs[I], RHSPartRegs[I]);
1915 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1916 }
1917
1918 CmpIn = CmpOut;
1919 }
1920
1921 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1922 Register CmpOut;
1923 CmpInst::Predicate PartPred;
1924
1925 if (I == E - 1) {
1926 PartPred = Pred;
1927 CmpOut = Dst;
1928 } else {
1929 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1930 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1931 }
1932
1933 if (!CmpIn) {
1934 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1935 RHSLeftoverRegs[I]);
1936 } else {
1937 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1938 RHSLeftoverRegs[I]);
1939 auto CmpEq =
1940 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1941 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1942 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1943 }
1944
1945 CmpIn = CmpOut;
1946 }
1947 }
1948 MI.eraseFromParent();
1949 return Legalized;
1950 }
1951 case TargetOpcode::G_FCMP:
1952 if (TypeIdx != 0)
1953 return UnableToLegalize;
1954
1955 Observer.changingInstr(MI);
1956 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1957 Observer.changedInstr(MI);
1958 return Legalized;
1959
1960 case TargetOpcode::G_SEXT_INREG: {
1961 if (TypeIdx != 0)
1962 return UnableToLegalize;
1963
1964 int64_t SizeInBits = MI.getOperand(2).getImm();
1965
1966 // So long as the new type has more bits than the bits we're extending we
1967 // don't need to break it apart.
1968 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1969 Observer.changingInstr(MI);
1970 // We don't lose any non-extension bits by truncating the src and
1971 // sign-extending the dst.
1972 MachineOperand &MO1 = MI.getOperand(1);
1973 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1974 MO1.setReg(TruncMIB.getReg(0));
1975
1976 MachineOperand &MO2 = MI.getOperand(0);
1977 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1978 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1979 MIRBuilder.buildSExt(MO2, DstExt);
1980 MO2.setReg(DstExt);
1981 Observer.changedInstr(MI);
1982 return Legalized;
1983 }
1984
1985 // Break it apart. Components below the extension point are unmodified. The
1986 // component containing the extension point becomes a narrower SEXT_INREG.
1987 // Components above it are ashr'd from the component containing the
1988 // extension point.
1989 if (SizeOp0 % NarrowSize != 0)
1990 return UnableToLegalize;
1991 int NumParts = SizeOp0 / NarrowSize;
1992
1993 // List the registers where the destination will be scattered.
1995 // List the registers where the source will be split.
1997
1998 // Create all the temporary registers.
1999 for (int i = 0; i < NumParts; ++i) {
2000 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2001
2002 SrcRegs.push_back(SrcReg);
2003 }
2004
2005 // Explode the big arguments into smaller chunks.
2006 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2007
2008 Register AshrCstReg =
2009 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2010 .getReg(0);
2011 Register FullExtensionReg;
2012 Register PartialExtensionReg;
2013
2014 // Do the operation on each small part.
2015 for (int i = 0; i < NumParts; ++i) {
2016 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2017 DstRegs.push_back(SrcRegs[i]);
2018 PartialExtensionReg = DstRegs.back();
2019 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2020 assert(PartialExtensionReg &&
2021 "Expected to visit partial extension before full");
2022 if (FullExtensionReg) {
2023 DstRegs.push_back(FullExtensionReg);
2024 continue;
2025 }
2026 DstRegs.push_back(
2027 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2028 .getReg(0));
2029 FullExtensionReg = DstRegs.back();
2030 } else {
2031 DstRegs.push_back(
2033 .buildInstr(
2034 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2035 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2036 .getReg(0));
2037 PartialExtensionReg = DstRegs.back();
2038 }
2039 }
2040
2041 // Gather the destination registers into the final destination.
2042 Register DstReg = MI.getOperand(0).getReg();
2043 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2044 MI.eraseFromParent();
2045 return Legalized;
2046 }
2047 case TargetOpcode::G_BSWAP:
2048 case TargetOpcode::G_BITREVERSE: {
2049 if (SizeOp0 % NarrowSize != 0)
2050 return UnableToLegalize;
2051
2052 Observer.changingInstr(MI);
2053 SmallVector<Register, 2> SrcRegs, DstRegs;
2054 unsigned NumParts = SizeOp0 / NarrowSize;
2055 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2056 MIRBuilder, MRI);
2057
2058 for (unsigned i = 0; i < NumParts; ++i) {
2059 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2060 {SrcRegs[NumParts - 1 - i]});
2061 DstRegs.push_back(DstPart.getReg(0));
2062 }
2063
2064 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2065
2066 Observer.changedInstr(MI);
2067 MI.eraseFromParent();
2068 return Legalized;
2069 }
2070 case TargetOpcode::G_PTR_ADD:
2071 case TargetOpcode::G_PTRMASK: {
2072 if (TypeIdx != 1)
2073 return UnableToLegalize;
2074 Observer.changingInstr(MI);
2075 narrowScalarSrc(MI, NarrowTy, 2);
2076 Observer.changedInstr(MI);
2077 return Legalized;
2078 }
2079 case TargetOpcode::G_FPTOUI:
2080 case TargetOpcode::G_FPTOSI:
2081 case TargetOpcode::G_FPTOUI_SAT:
2082 case TargetOpcode::G_FPTOSI_SAT:
2083 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2084 case TargetOpcode::G_FPEXT:
2085 if (TypeIdx != 0)
2086 return UnableToLegalize;
2087 Observer.changingInstr(MI);
2088 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2089 Observer.changedInstr(MI);
2090 return Legalized;
2091 case TargetOpcode::G_FLDEXP:
2092 case TargetOpcode::G_STRICT_FLDEXP:
2093 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2094 case TargetOpcode::G_VSCALE: {
2095 Register Dst = MI.getOperand(0).getReg();
2096 LLT Ty = MRI.getType(Dst);
2097
2098 // Assume VSCALE(1) fits into a legal integer
2099 const APInt One(NarrowTy.getSizeInBits(), 1);
2100 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2101 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2102 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2103 MIRBuilder.buildMul(Dst, ZExt, C);
2104
2105 MI.eraseFromParent();
2106 return Legalized;
2107 }
2108 }
2109}
2110
2112 LLT Ty = MRI.getType(Val);
2113 if (Ty.isScalar())
2114 return Val;
2115
2116 const DataLayout &DL = MIRBuilder.getDataLayout();
2117 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2118 if (Ty.isPointer()) {
2119 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2120 return Register();
2121 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2122 }
2123
2124 Register NewVal = Val;
2125
2126 assert(Ty.isVector());
2127 if (Ty.isPointerVector())
2128 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2129 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2130}
2131
2133 unsigned OpIdx, unsigned ExtOpcode) {
2134 MachineOperand &MO = MI.getOperand(OpIdx);
2135 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2136 MO.setReg(ExtB.getReg(0));
2137}
2138
2140 unsigned OpIdx) {
2141 MachineOperand &MO = MI.getOperand(OpIdx);
2142 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2143 MO.setReg(ExtB.getReg(0));
2144}
2145
2147 unsigned OpIdx, unsigned TruncOpcode) {
2148 MachineOperand &MO = MI.getOperand(OpIdx);
2149 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2150 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2151 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2152 MO.setReg(DstExt);
2153}
2154
2156 unsigned OpIdx, unsigned ExtOpcode) {
2157 MachineOperand &MO = MI.getOperand(OpIdx);
2158 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2159 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2160 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2161 MO.setReg(DstTrunc);
2162}
2163
2165 unsigned OpIdx) {
2166 MachineOperand &MO = MI.getOperand(OpIdx);
2167 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2168 Register Dst = MO.getReg();
2169 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2170 MO.setReg(DstExt);
2171 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2172}
2173
2175 unsigned OpIdx) {
2176 MachineOperand &MO = MI.getOperand(OpIdx);
2177 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2178}
2179
2181 MachineOperand &Op = MI.getOperand(OpIdx);
2182 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2183}
2184
2186 MachineOperand &MO = MI.getOperand(OpIdx);
2187 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2188 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2189 MIRBuilder.buildBitcast(MO, CastDst);
2190 MO.setReg(CastDst);
2191}
2192
2194LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2195 LLT WideTy) {
2196 if (TypeIdx != 1)
2197 return UnableToLegalize;
2198
2199 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2200 if (DstTy.isVector())
2201 return UnableToLegalize;
2202
2203 LLT SrcTy = MRI.getType(Src1Reg);
2204 const int DstSize = DstTy.getSizeInBits();
2205 const int SrcSize = SrcTy.getSizeInBits();
2206 const int WideSize = WideTy.getSizeInBits();
2207 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2208
2209 unsigned NumOps = MI.getNumOperands();
2210 unsigned NumSrc = MI.getNumOperands() - 1;
2211 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2212
2213 if (WideSize >= DstSize) {
2214 // Directly pack the bits in the target type.
2215 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2216
2217 for (unsigned I = 2; I != NumOps; ++I) {
2218 const unsigned Offset = (I - 1) * PartSize;
2219
2220 Register SrcReg = MI.getOperand(I).getReg();
2221 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2222
2223 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2224
2225 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2226 MRI.createGenericVirtualRegister(WideTy);
2227
2228 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2229 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2230 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2231 ResultReg = NextResult;
2232 }
2233
2234 if (WideSize > DstSize)
2235 MIRBuilder.buildTrunc(DstReg, ResultReg);
2236 else if (DstTy.isPointer())
2237 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2238
2239 MI.eraseFromParent();
2240 return Legalized;
2241 }
2242
2243 // Unmerge the original values to the GCD type, and recombine to the next
2244 // multiple greater than the original type.
2245 //
2246 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2247 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2248 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2249 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2250 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2251 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2252 // %12:_(s12) = G_MERGE_VALUES %10, %11
2253 //
2254 // Padding with undef if necessary:
2255 //
2256 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2257 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2258 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2259 // %7:_(s2) = G_IMPLICIT_DEF
2260 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2261 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2262 // %10:_(s12) = G_MERGE_VALUES %8, %9
2263
2264 const int GCD = std::gcd(SrcSize, WideSize);
2265 LLT GCDTy = LLT::scalar(GCD);
2266
2267 SmallVector<Register, 8> NewMergeRegs;
2268 SmallVector<Register, 8> Unmerges;
2269 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2270
2271 // Decompose the original operands if they don't evenly divide.
2272 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2273 Register SrcReg = MO.getReg();
2274 if (GCD == SrcSize) {
2275 Unmerges.push_back(SrcReg);
2276 } else {
2277 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2278 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2279 Unmerges.push_back(Unmerge.getReg(J));
2280 }
2281 }
2282
2283 // Pad with undef to the next size that is a multiple of the requested size.
2284 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2285 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2286 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2287 Unmerges.push_back(UndefReg);
2288 }
2289
2290 const int PartsPerGCD = WideSize / GCD;
2291
2292 // Build merges of each piece.
2293 ArrayRef<Register> Slicer(Unmerges);
2294 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2295 auto Merge =
2296 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2297 NewMergeRegs.push_back(Merge.getReg(0));
2298 }
2299
2300 // A truncate may be necessary if the requested type doesn't evenly divide the
2301 // original result type.
2302 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2303 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2304 } else {
2305 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2306 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2307 }
2308
2309 MI.eraseFromParent();
2310 return Legalized;
2311}
2312
2314LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2315 LLT WideTy) {
2316 if (TypeIdx != 0)
2317 return UnableToLegalize;
2318
2319 int NumDst = MI.getNumOperands() - 1;
2320 Register SrcReg = MI.getOperand(NumDst).getReg();
2321 LLT SrcTy = MRI.getType(SrcReg);
2322 if (SrcTy.isVector())
2323 return UnableToLegalize;
2324
2325 Register Dst0Reg = MI.getOperand(0).getReg();
2326 LLT DstTy = MRI.getType(Dst0Reg);
2327 if (!DstTy.isScalar())
2328 return UnableToLegalize;
2329
2330 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2331 if (SrcTy.isPointer()) {
2332 const DataLayout &DL = MIRBuilder.getDataLayout();
2333 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2334 LLVM_DEBUG(
2335 dbgs() << "Not casting non-integral address space integer\n");
2336 return UnableToLegalize;
2337 }
2338
2339 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2340 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2341 }
2342
2343 // Widen SrcTy to WideTy. This does not affect the result, but since the
2344 // user requested this size, it is probably better handled than SrcTy and
2345 // should reduce the total number of legalization artifacts.
2346 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2347 SrcTy = WideTy;
2348 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2349 }
2350
2351 // Theres no unmerge type to target. Directly extract the bits from the
2352 // source type
2353 unsigned DstSize = DstTy.getSizeInBits();
2354
2355 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2356 for (int I = 1; I != NumDst; ++I) {
2357 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2358 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2359 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2360 }
2361
2362 MI.eraseFromParent();
2363 return Legalized;
2364 }
2365
2366 // Extend the source to a wider type.
2367 LLT LCMTy = getLCMType(SrcTy, WideTy);
2368
2369 Register WideSrc = SrcReg;
2370 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2371 // TODO: If this is an integral address space, cast to integer and anyext.
2372 if (SrcTy.isPointer()) {
2373 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2374 return UnableToLegalize;
2375 }
2376
2377 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2378 }
2379
2380 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2381
2382 // Create a sequence of unmerges and merges to the original results. Since we
2383 // may have widened the source, we will need to pad the results with dead defs
2384 // to cover the source register.
2385 // e.g. widen s48 to s64:
2386 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2387 //
2388 // =>
2389 // %4:_(s192) = G_ANYEXT %0:_(s96)
2390 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2391 // ; unpack to GCD type, with extra dead defs
2392 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2393 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2394 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2395 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2396 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2397 const LLT GCDTy = getGCDType(WideTy, DstTy);
2398 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2399 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2400
2401 // Directly unmerge to the destination without going through a GCD type
2402 // if possible
2403 if (PartsPerRemerge == 1) {
2404 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2405
2406 for (int I = 0; I != NumUnmerge; ++I) {
2407 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2408
2409 for (int J = 0; J != PartsPerUnmerge; ++J) {
2410 int Idx = I * PartsPerUnmerge + J;
2411 if (Idx < NumDst)
2412 MIB.addDef(MI.getOperand(Idx).getReg());
2413 else {
2414 // Create dead def for excess components.
2415 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2416 }
2417 }
2418
2419 MIB.addUse(Unmerge.getReg(I));
2420 }
2421 } else {
2422 SmallVector<Register, 16> Parts;
2423 for (int J = 0; J != NumUnmerge; ++J)
2424 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2425
2426 SmallVector<Register, 8> RemergeParts;
2427 for (int I = 0; I != NumDst; ++I) {
2428 for (int J = 0; J < PartsPerRemerge; ++J) {
2429 const int Idx = I * PartsPerRemerge + J;
2430 RemergeParts.emplace_back(Parts[Idx]);
2431 }
2432
2433 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2434 RemergeParts.clear();
2435 }
2436 }
2437
2438 MI.eraseFromParent();
2439 return Legalized;
2440}
2441
2443LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2444 LLT WideTy) {
2445 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2446 unsigned Offset = MI.getOperand(2).getImm();
2447
2448 if (TypeIdx == 0) {
2449 if (SrcTy.isVector() || DstTy.isVector())
2450 return UnableToLegalize;
2451
2452 SrcOp Src(SrcReg);
2453 if (SrcTy.isPointer()) {
2454 // Extracts from pointers can be handled only if they are really just
2455 // simple integers.
2456 const DataLayout &DL = MIRBuilder.getDataLayout();
2457 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2458 return UnableToLegalize;
2459
2460 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2461 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2462 SrcTy = SrcAsIntTy;
2463 }
2464
2465 if (DstTy.isPointer())
2466 return UnableToLegalize;
2467
2468 if (Offset == 0) {
2469 // Avoid a shift in the degenerate case.
2470 MIRBuilder.buildTrunc(DstReg,
2471 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2472 MI.eraseFromParent();
2473 return Legalized;
2474 }
2475
2476 // Do a shift in the source type.
2477 LLT ShiftTy = SrcTy;
2478 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2479 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2480 ShiftTy = WideTy;
2481 }
2482
2483 auto LShr = MIRBuilder.buildLShr(
2484 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2485 MIRBuilder.buildTrunc(DstReg, LShr);
2486 MI.eraseFromParent();
2487 return Legalized;
2488 }
2489
2490 if (SrcTy.isScalar()) {
2491 Observer.changingInstr(MI);
2492 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2493 Observer.changedInstr(MI);
2494 return Legalized;
2495 }
2496
2497 if (!SrcTy.isVector())
2498 return UnableToLegalize;
2499
2500 if (DstTy != SrcTy.getElementType())
2501 return UnableToLegalize;
2502
2503 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2504 return UnableToLegalize;
2505
2506 Observer.changingInstr(MI);
2507 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2508
2509 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2510 Offset);
2511 widenScalarDst(MI, WideTy.getScalarType(), 0);
2512 Observer.changedInstr(MI);
2513 return Legalized;
2514}
2515
2517LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2518 LLT WideTy) {
2519 if (TypeIdx != 0 || WideTy.isVector())
2520 return UnableToLegalize;
2521 Observer.changingInstr(MI);
2522 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2523 widenScalarDst(MI, WideTy);
2524 Observer.changedInstr(MI);
2525 return Legalized;
2526}
2527
2529LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2530 LLT WideTy) {
2531 unsigned Opcode;
2532 unsigned ExtOpcode;
2533 std::optional<Register> CarryIn;
2534 switch (MI.getOpcode()) {
2535 default:
2536 llvm_unreachable("Unexpected opcode!");
2537 case TargetOpcode::G_SADDO:
2538 Opcode = TargetOpcode::G_ADD;
2539 ExtOpcode = TargetOpcode::G_SEXT;
2540 break;
2541 case TargetOpcode::G_SSUBO:
2542 Opcode = TargetOpcode::G_SUB;
2543 ExtOpcode = TargetOpcode::G_SEXT;
2544 break;
2545 case TargetOpcode::G_UADDO:
2546 Opcode = TargetOpcode::G_ADD;
2547 ExtOpcode = TargetOpcode::G_ZEXT;
2548 break;
2549 case TargetOpcode::G_USUBO:
2550 Opcode = TargetOpcode::G_SUB;
2551 ExtOpcode = TargetOpcode::G_ZEXT;
2552 break;
2553 case TargetOpcode::G_SADDE:
2554 Opcode = TargetOpcode::G_UADDE;
2555 ExtOpcode = TargetOpcode::G_SEXT;
2556 CarryIn = MI.getOperand(4).getReg();
2557 break;
2558 case TargetOpcode::G_SSUBE:
2559 Opcode = TargetOpcode::G_USUBE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn = MI.getOperand(4).getReg();
2562 break;
2563 case TargetOpcode::G_UADDE:
2564 Opcode = TargetOpcode::G_UADDE;
2565 ExtOpcode = TargetOpcode::G_ZEXT;
2566 CarryIn = MI.getOperand(4).getReg();
2567 break;
2568 case TargetOpcode::G_USUBE:
2569 Opcode = TargetOpcode::G_USUBE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn = MI.getOperand(4).getReg();
2572 break;
2573 }
2574
2575 if (TypeIdx == 1) {
2576 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2577
2578 Observer.changingInstr(MI);
2579 if (CarryIn)
2580 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2581 widenScalarDst(MI, WideTy, 1);
2582
2583 Observer.changedInstr(MI);
2584 return Legalized;
2585 }
2586
2587 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2588 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2589 // Do the arithmetic in the larger type.
2590 Register NewOp;
2591 if (CarryIn) {
2592 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2593 NewOp = MIRBuilder
2594 .buildInstr(Opcode, {WideTy, CarryOutTy},
2595 {LHSExt, RHSExt, *CarryIn})
2596 .getReg(0);
2597 } else {
2598 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2599 }
2600 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2601 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2602 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2603 // There is no overflow if the ExtOp is the same as NewOp.
2604 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2605 // Now trunc the NewOp to the original result.
2606 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2607 MI.eraseFromParent();
2608 return Legalized;
2609}
2610
2612LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2613 LLT WideTy) {
2614 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2615 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2616 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2617 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2618 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2619 // We can convert this to:
2620 // 1. Any extend iN to iM
2621 // 2. SHL by M-N
2622 // 3. [US][ADD|SUB|SHL]SAT
2623 // 4. L/ASHR by M-N
2624 //
2625 // It may be more efficient to lower this to a min and a max operation in
2626 // the higher precision arithmetic if the promoted operation isn't legal,
2627 // but this decision is up to the target's lowering request.
2628 Register DstReg = MI.getOperand(0).getReg();
2629
2630 unsigned NewBits = WideTy.getScalarSizeInBits();
2631 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2632
2633 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2634 // must not left shift the RHS to preserve the shift amount.
2635 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2636 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2637 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2638 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2639 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2640 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2641
2642 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2643 {ShiftL, ShiftR}, MI.getFlags());
2644
2645 // Use a shift that will preserve the number of sign bits when the trunc is
2646 // folded away.
2647 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2648 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2649
2650 MIRBuilder.buildTrunc(DstReg, Result);
2651 MI.eraseFromParent();
2652 return Legalized;
2653}
2654
2656LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2657 LLT WideTy) {
2658 if (TypeIdx == 1) {
2659 Observer.changingInstr(MI);
2660 widenScalarDst(MI, WideTy, 1);
2661 Observer.changedInstr(MI);
2662 return Legalized;
2663 }
2664
2665 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2666 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2667 LLT SrcTy = MRI.getType(LHS);
2668 LLT OverflowTy = MRI.getType(OriginalOverflow);
2669 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2670
2671 // To determine if the result overflowed in the larger type, we extend the
2672 // input to the larger type, do the multiply (checking if it overflows),
2673 // then also check the high bits of the result to see if overflow happened
2674 // there.
2675 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2676 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2677 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2678
2679 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2680 // so we don't need to check the overflow result of larger type Mulo.
2681 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2682
2683 unsigned MulOpc =
2684 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2685
2686 MachineInstrBuilder Mulo;
2687 if (WideMulCanOverflow)
2688 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2689 {LeftOperand, RightOperand});
2690 else
2691 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2692
2693 auto Mul = Mulo->getOperand(0);
2694 MIRBuilder.buildTrunc(Result, Mul);
2695
2696 MachineInstrBuilder ExtResult;
2697 // Overflow occurred if it occurred in the larger type, or if the high part
2698 // of the result does not zero/sign-extend the low part. Check this second
2699 // possibility first.
2700 if (IsSigned) {
2701 // For signed, overflow occurred when the high part does not sign-extend
2702 // the low part.
2703 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2704 } else {
2705 // Unsigned overflow occurred when the high part does not zero-extend the
2706 // low part.
2707 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2708 }
2709
2710 if (WideMulCanOverflow) {
2711 auto Overflow =
2712 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2713 // Finally check if the multiplication in the larger type itself overflowed.
2714 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2715 } else {
2716 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2717 }
2718 MI.eraseFromParent();
2719 return Legalized;
2720}
2721
2724 unsigned Opcode = MI.getOpcode();
2725 switch (Opcode) {
2726 default:
2727 return UnableToLegalize;
2728 case TargetOpcode::G_ATOMICRMW_XCHG:
2729 case TargetOpcode::G_ATOMICRMW_ADD:
2730 case TargetOpcode::G_ATOMICRMW_SUB:
2731 case TargetOpcode::G_ATOMICRMW_AND:
2732 case TargetOpcode::G_ATOMICRMW_OR:
2733 case TargetOpcode::G_ATOMICRMW_XOR:
2734 case TargetOpcode::G_ATOMICRMW_MIN:
2735 case TargetOpcode::G_ATOMICRMW_MAX:
2736 case TargetOpcode::G_ATOMICRMW_UMIN:
2737 case TargetOpcode::G_ATOMICRMW_UMAX:
2738 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2739 Observer.changingInstr(MI);
2740 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2741 widenScalarDst(MI, WideTy, 0);
2742 Observer.changedInstr(MI);
2743 return Legalized;
2744 case TargetOpcode::G_ATOMIC_CMPXCHG:
2745 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2746 Observer.changingInstr(MI);
2747 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2748 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2749 widenScalarDst(MI, WideTy, 0);
2750 Observer.changedInstr(MI);
2751 return Legalized;
2752 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2753 if (TypeIdx == 0) {
2754 Observer.changingInstr(MI);
2755 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2756 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2757 widenScalarDst(MI, WideTy, 0);
2758 Observer.changedInstr(MI);
2759 return Legalized;
2760 }
2761 assert(TypeIdx == 1 &&
2762 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2763 Observer.changingInstr(MI);
2764 widenScalarDst(MI, WideTy, 1);
2765 Observer.changedInstr(MI);
2766 return Legalized;
2767 case TargetOpcode::G_EXTRACT:
2768 return widenScalarExtract(MI, TypeIdx, WideTy);
2769 case TargetOpcode::G_INSERT:
2770 return widenScalarInsert(MI, TypeIdx, WideTy);
2771 case TargetOpcode::G_MERGE_VALUES:
2772 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2773 case TargetOpcode::G_UNMERGE_VALUES:
2774 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2775 case TargetOpcode::G_SADDO:
2776 case TargetOpcode::G_SSUBO:
2777 case TargetOpcode::G_UADDO:
2778 case TargetOpcode::G_USUBO:
2779 case TargetOpcode::G_SADDE:
2780 case TargetOpcode::G_SSUBE:
2781 case TargetOpcode::G_UADDE:
2782 case TargetOpcode::G_USUBE:
2783 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2784 case TargetOpcode::G_UMULO:
2785 case TargetOpcode::G_SMULO:
2786 return widenScalarMulo(MI, TypeIdx, WideTy);
2787 case TargetOpcode::G_SADDSAT:
2788 case TargetOpcode::G_SSUBSAT:
2789 case TargetOpcode::G_SSHLSAT:
2790 case TargetOpcode::G_UADDSAT:
2791 case TargetOpcode::G_USUBSAT:
2792 case TargetOpcode::G_USHLSAT:
2793 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2794 case TargetOpcode::G_CTTZ:
2795 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2796 case TargetOpcode::G_CTLZ:
2797 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2798 case TargetOpcode::G_CTLS:
2799 case TargetOpcode::G_CTPOP: {
2800 if (TypeIdx == 0) {
2801 Observer.changingInstr(MI);
2802 widenScalarDst(MI, WideTy, 0);
2803 Observer.changedInstr(MI);
2804 return Legalized;
2805 }
2806
2807 Register SrcReg = MI.getOperand(1).getReg();
2808
2809 // First extend the input.
2810 unsigned ExtOpc;
2811 switch (Opcode) {
2812 case TargetOpcode::G_CTTZ:
2813 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2814 ExtOpc = TargetOpcode::G_ANYEXT;
2815 break;
2816 case TargetOpcode::G_CTLS:
2817 ExtOpc = TargetOpcode::G_SEXT;
2818 break;
2819 default:
2820 ExtOpc = TargetOpcode::G_ZEXT;
2821 }
2822
2823 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2824 LLT CurTy = MRI.getType(SrcReg);
2825 unsigned NewOpc = Opcode;
2826 if (NewOpc == TargetOpcode::G_CTTZ) {
2827 // The count is the same in the larger type except if the original
2828 // value was zero. This can be handled by setting the bit just off
2829 // the top of the original type.
2830 auto TopBit =
2832 MIBSrc = MIRBuilder.buildOr(
2833 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2834 // Now we know the operand is non-zero, use the more relaxed opcode.
2835 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2836 }
2837
2838 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2839
2840 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2841 // An optimization where the result is the CTLZ after the left shift by
2842 // (Difference in widety and current ty), that is,
2843 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2844 // Result = ctlz MIBSrc
2845 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2846 MIRBuilder.buildConstant(WideTy, SizeDiff));
2847 }
2848
2849 // Perform the operation at the larger size.
2850 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2851 // This is already the correct result for CTPOP and CTTZs
2852 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2853 // The correct result is NewOp - (Difference in widety and current ty).
2854 MIBNewOp = MIRBuilder.buildSub(
2855 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2856 }
2857
2858 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2859 MI.eraseFromParent();
2860 return Legalized;
2861 }
2862 case TargetOpcode::G_BSWAP: {
2863 Observer.changingInstr(MI);
2864 Register DstReg = MI.getOperand(0).getReg();
2865
2866 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2867 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2868 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2869 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2870
2871 MI.getOperand(0).setReg(DstExt);
2872
2873 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2874
2875 LLT Ty = MRI.getType(DstReg);
2876 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2877 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2878 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2879
2880 MIRBuilder.buildTrunc(DstReg, ShrReg);
2881 Observer.changedInstr(MI);
2882 return Legalized;
2883 }
2884 case TargetOpcode::G_BITREVERSE: {
2885 Observer.changingInstr(MI);
2886
2887 Register DstReg = MI.getOperand(0).getReg();
2888 LLT Ty = MRI.getType(DstReg);
2889 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2890
2891 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2892 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2893 MI.getOperand(0).setReg(DstExt);
2894 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2895
2896 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2897 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2898 MIRBuilder.buildTrunc(DstReg, Shift);
2899 Observer.changedInstr(MI);
2900 return Legalized;
2901 }
2902 case TargetOpcode::G_FREEZE:
2903 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2904 Observer.changingInstr(MI);
2905 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2906 widenScalarDst(MI, WideTy);
2907 Observer.changedInstr(MI);
2908 return Legalized;
2909
2910 case TargetOpcode::G_ABS:
2911 Observer.changingInstr(MI);
2912 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2913 widenScalarDst(MI, WideTy);
2914 Observer.changedInstr(MI);
2915 return Legalized;
2916
2917 case TargetOpcode::G_ADD:
2918 case TargetOpcode::G_AND:
2919 case TargetOpcode::G_MUL:
2920 case TargetOpcode::G_OR:
2921 case TargetOpcode::G_XOR:
2922 case TargetOpcode::G_SUB:
2923 case TargetOpcode::G_SHUFFLE_VECTOR:
2924 // Perform operation at larger width (any extension is fines here, high bits
2925 // don't affect the result) and then truncate the result back to the
2926 // original type.
2927 Observer.changingInstr(MI);
2928 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2929 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2930 widenScalarDst(MI, WideTy);
2931 Observer.changedInstr(MI);
2932 return Legalized;
2933
2934 case TargetOpcode::G_SBFX:
2935 case TargetOpcode::G_UBFX:
2936 Observer.changingInstr(MI);
2937
2938 if (TypeIdx == 0) {
2939 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2940 widenScalarDst(MI, WideTy);
2941 } else {
2942 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2943 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2944 }
2945
2946 Observer.changedInstr(MI);
2947 return Legalized;
2948
2949 case TargetOpcode::G_SHL:
2950 Observer.changingInstr(MI);
2951
2952 if (TypeIdx == 0) {
2953 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2954 widenScalarDst(MI, WideTy);
2955 } else {
2956 assert(TypeIdx == 1);
2957 // The "number of bits to shift" operand must preserve its value as an
2958 // unsigned integer:
2959 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2960 }
2961
2962 Observer.changedInstr(MI);
2963 return Legalized;
2964
2965 case TargetOpcode::G_ROTR:
2966 case TargetOpcode::G_ROTL:
2967 if (TypeIdx != 1)
2968 return UnableToLegalize;
2969
2970 Observer.changingInstr(MI);
2971 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2972 Observer.changedInstr(MI);
2973 return Legalized;
2974
2975 case TargetOpcode::G_SDIV:
2976 case TargetOpcode::G_SREM:
2977 case TargetOpcode::G_SMIN:
2978 case TargetOpcode::G_SMAX:
2979 case TargetOpcode::G_ABDS:
2980 Observer.changingInstr(MI);
2981 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2982 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2983 widenScalarDst(MI, WideTy);
2984 Observer.changedInstr(MI);
2985 return Legalized;
2986
2987 case TargetOpcode::G_SDIVREM:
2988 Observer.changingInstr(MI);
2989 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2990 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2991 widenScalarDst(MI, WideTy);
2992 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2993 widenScalarDst(MI, WideTy, 1);
2994 Observer.changedInstr(MI);
2995 return Legalized;
2996
2997 case TargetOpcode::G_ASHR:
2998 case TargetOpcode::G_LSHR:
2999 Observer.changingInstr(MI);
3000
3001 if (TypeIdx == 0) {
3002 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3003 : TargetOpcode::G_ZEXT;
3004
3005 widenScalarSrc(MI, WideTy, 1, CvtOp);
3006 widenScalarDst(MI, WideTy);
3007 } else {
3008 assert(TypeIdx == 1);
3009 // The "number of bits to shift" operand must preserve its value as an
3010 // unsigned integer:
3011 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3012 }
3013
3014 Observer.changedInstr(MI);
3015 return Legalized;
3016 case TargetOpcode::G_UDIV:
3017 case TargetOpcode::G_UREM:
3018 case TargetOpcode::G_ABDU:
3019 Observer.changingInstr(MI);
3020 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3021 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3022 widenScalarDst(MI, WideTy);
3023 Observer.changedInstr(MI);
3024 return Legalized;
3025 case TargetOpcode::G_UDIVREM:
3026 Observer.changingInstr(MI);
3027 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3028 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3029 widenScalarDst(MI, WideTy);
3030 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3031 widenScalarDst(MI, WideTy, 1);
3032 Observer.changedInstr(MI);
3033 return Legalized;
3034 case TargetOpcode::G_UMIN:
3035 case TargetOpcode::G_UMAX: {
3036 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3037
3038 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3039 unsigned ExtOpc =
3040 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3041 getApproximateEVTForLLT(WideTy, Ctx))
3042 ? TargetOpcode::G_SEXT
3043 : TargetOpcode::G_ZEXT;
3044
3045 Observer.changingInstr(MI);
3046 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3047 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3048 widenScalarDst(MI, WideTy);
3049 Observer.changedInstr(MI);
3050 return Legalized;
3051 }
3052
3053 case TargetOpcode::G_SELECT:
3054 Observer.changingInstr(MI);
3055 if (TypeIdx == 0) {
3056 // Perform operation at larger width (any extension is fine here, high
3057 // bits don't affect the result) and then truncate the result back to the
3058 // original type.
3059 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3060 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3061 widenScalarDst(MI, WideTy);
3062 } else {
3063 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3064 // Explicit extension is required here since high bits affect the result.
3065 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3066 }
3067 Observer.changedInstr(MI);
3068 return Legalized;
3069
3070 case TargetOpcode::G_FPEXT:
3071 if (TypeIdx != 1)
3072 return UnableToLegalize;
3073
3074 Observer.changingInstr(MI);
3075 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3076 Observer.changedInstr(MI);
3077 return Legalized;
3078 case TargetOpcode::G_FPTOSI:
3079 case TargetOpcode::G_FPTOUI:
3080 case TargetOpcode::G_INTRINSIC_LRINT:
3081 case TargetOpcode::G_INTRINSIC_LLRINT:
3082 case TargetOpcode::G_IS_FPCLASS:
3083 Observer.changingInstr(MI);
3084
3085 if (TypeIdx == 0)
3086 widenScalarDst(MI, WideTy);
3087 else
3088 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3089
3090 Observer.changedInstr(MI);
3091 return Legalized;
3092 case TargetOpcode::G_SITOFP:
3093 Observer.changingInstr(MI);
3094
3095 if (TypeIdx == 0)
3096 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3097 else
3098 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3099
3100 Observer.changedInstr(MI);
3101 return Legalized;
3102 case TargetOpcode::G_UITOFP:
3103 Observer.changingInstr(MI);
3104
3105 if (TypeIdx == 0)
3106 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3107 else
3108 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3109
3110 Observer.changedInstr(MI);
3111 return Legalized;
3112 case TargetOpcode::G_FPTOSI_SAT:
3113 case TargetOpcode::G_FPTOUI_SAT:
3114 Observer.changingInstr(MI);
3115
3116 if (TypeIdx == 0) {
3117 Register OldDst = MI.getOperand(0).getReg();
3118 LLT Ty = MRI.getType(OldDst);
3119 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3120 Register NewDst;
3121 MI.getOperand(0).setReg(ExtReg);
3122 uint64_t ShortBits = Ty.getScalarSizeInBits();
3123 uint64_t WideBits = WideTy.getScalarSizeInBits();
3124 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3125 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3126 // z = i16 fptosi_sat(a)
3127 // ->
3128 // x = i32 fptosi_sat(a)
3129 // y = smin(x, 32767)
3130 // z = smax(y, -32768)
3131 auto MaxVal = MIRBuilder.buildConstant(
3132 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3133 auto MinVal = MIRBuilder.buildConstant(
3134 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3135 Register MidReg =
3136 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3137 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3138 } else {
3139 // z = i16 fptoui_sat(a)
3140 // ->
3141 // x = i32 fptoui_sat(a)
3142 // y = smin(x, 65535)
3143 auto MaxVal = MIRBuilder.buildConstant(
3144 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3145 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3146 }
3147 MIRBuilder.buildTrunc(OldDst, NewDst);
3148 } else
3149 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3150
3151 Observer.changedInstr(MI);
3152 return Legalized;
3153 case TargetOpcode::G_LOAD:
3154 case TargetOpcode::G_SEXTLOAD:
3155 case TargetOpcode::G_ZEXTLOAD:
3156 Observer.changingInstr(MI);
3157 widenScalarDst(MI, WideTy);
3158 Observer.changedInstr(MI);
3159 return Legalized;
3160
3161 case TargetOpcode::G_STORE: {
3162 if (TypeIdx != 0)
3163 return UnableToLegalize;
3164
3165 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3166 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3167 if (!Ty.isScalar()) {
3168 // We need to widen the vector element type.
3169 Observer.changingInstr(MI);
3170 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3171 // We also need to adjust the MMO to turn this into a truncating store.
3172 MachineMemOperand &MMO = **MI.memoperands_begin();
3173 MachineFunction &MF = MIRBuilder.getMF();
3174 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3175 MI.setMemRefs(MF, {NewMMO});
3176 Observer.changedInstr(MI);
3177 return Legalized;
3178 }
3179
3180 Observer.changingInstr(MI);
3181
3182 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3183 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3184 widenScalarSrc(MI, WideTy, 0, ExtType);
3185
3186 Observer.changedInstr(MI);
3187 return Legalized;
3188 }
3189 case TargetOpcode::G_CONSTANT: {
3190 MachineOperand &SrcMO = MI.getOperand(1);
3191 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3192 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3193 MRI.getType(MI.getOperand(0).getReg()));
3194 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3195 ExtOpc == TargetOpcode::G_ANYEXT) &&
3196 "Illegal Extend");
3197 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3198 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3199 ? SrcVal.sext(WideTy.getSizeInBits())
3200 : SrcVal.zext(WideTy.getSizeInBits());
3201 Observer.changingInstr(MI);
3202 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3203
3204 widenScalarDst(MI, WideTy);
3205 Observer.changedInstr(MI);
3206 return Legalized;
3207 }
3208 case TargetOpcode::G_FCONSTANT: {
3209 // To avoid changing the bits of the constant due to extension to a larger
3210 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3211 MachineOperand &SrcMO = MI.getOperand(1);
3212 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3213 MIRBuilder.setInstrAndDebugLoc(MI);
3214 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3215 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3216 MI.eraseFromParent();
3217 return Legalized;
3218 }
3219 case TargetOpcode::G_IMPLICIT_DEF: {
3220 Observer.changingInstr(MI);
3221 widenScalarDst(MI, WideTy);
3222 Observer.changedInstr(MI);
3223 return Legalized;
3224 }
3225 case TargetOpcode::G_BRCOND:
3226 Observer.changingInstr(MI);
3227 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3228 Observer.changedInstr(MI);
3229 return Legalized;
3230
3231 case TargetOpcode::G_FCMP:
3232 Observer.changingInstr(MI);
3233 if (TypeIdx == 0)
3234 widenScalarDst(MI, WideTy);
3235 else {
3236 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3237 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3238 }
3239 Observer.changedInstr(MI);
3240 return Legalized;
3241
3242 case TargetOpcode::G_ICMP:
3243 Observer.changingInstr(MI);
3244 if (TypeIdx == 0)
3245 widenScalarDst(MI, WideTy);
3246 else {
3247 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3248 CmpInst::Predicate Pred =
3249 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3250
3251 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3252 unsigned ExtOpcode =
3253 (CmpInst::isSigned(Pred) ||
3254 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3255 getApproximateEVTForLLT(WideTy, Ctx)))
3256 ? TargetOpcode::G_SEXT
3257 : TargetOpcode::G_ZEXT;
3258 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3259 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3260 }
3261 Observer.changedInstr(MI);
3262 return Legalized;
3263
3264 case TargetOpcode::G_PTR_ADD:
3265 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3266 Observer.changingInstr(MI);
3267 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3268 Observer.changedInstr(MI);
3269 return Legalized;
3270
3271 case TargetOpcode::G_PHI: {
3272 assert(TypeIdx == 0 && "Expecting only Idx 0");
3273
3274 Observer.changingInstr(MI);
3275 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3276 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3277 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3278 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3279 }
3280
3281 MachineBasicBlock &MBB = *MI.getParent();
3282 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3283 widenScalarDst(MI, WideTy);
3284 Observer.changedInstr(MI);
3285 return Legalized;
3286 }
3287 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3288 if (TypeIdx == 0) {
3289 Register VecReg = MI.getOperand(1).getReg();
3290 LLT VecTy = MRI.getType(VecReg);
3291 Observer.changingInstr(MI);
3292
3294 MI,
3296 TargetOpcode::G_ANYEXT);
3297
3298 widenScalarDst(MI, WideTy, 0);
3299 Observer.changedInstr(MI);
3300 return Legalized;
3301 }
3302
3303 if (TypeIdx != 2)
3304 return UnableToLegalize;
3305 Observer.changingInstr(MI);
3306 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3307 Observer.changedInstr(MI);
3308 return Legalized;
3309 }
3310 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3311 if (TypeIdx == 0) {
3312 Observer.changingInstr(MI);
3313 const LLT WideEltTy = WideTy.getElementType();
3314
3315 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3316 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3317 widenScalarDst(MI, WideTy, 0);
3318 Observer.changedInstr(MI);
3319 return Legalized;
3320 }
3321
3322 if (TypeIdx == 1) {
3323 Observer.changingInstr(MI);
3324
3325 Register VecReg = MI.getOperand(1).getReg();
3326 LLT VecTy = MRI.getType(VecReg);
3327 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3328
3329 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3330 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3331 widenScalarDst(MI, WideVecTy, 0);
3332 Observer.changedInstr(MI);
3333 return Legalized;
3334 }
3335
3336 if (TypeIdx == 2) {
3337 Observer.changingInstr(MI);
3338 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3339 Observer.changedInstr(MI);
3340 return Legalized;
3341 }
3342
3343 return UnableToLegalize;
3344 }
3345 case TargetOpcode::G_FADD:
3346 case TargetOpcode::G_FMUL:
3347 case TargetOpcode::G_FSUB:
3348 case TargetOpcode::G_FMA:
3349 case TargetOpcode::G_FMAD:
3350 case TargetOpcode::G_FNEG:
3351 case TargetOpcode::G_FABS:
3352 case TargetOpcode::G_FCANONICALIZE:
3353 case TargetOpcode::G_FMINNUM:
3354 case TargetOpcode::G_FMAXNUM:
3355 case TargetOpcode::G_FMINNUM_IEEE:
3356 case TargetOpcode::G_FMAXNUM_IEEE:
3357 case TargetOpcode::G_FMINIMUM:
3358 case TargetOpcode::G_FMAXIMUM:
3359 case TargetOpcode::G_FMINIMUMNUM:
3360 case TargetOpcode::G_FMAXIMUMNUM:
3361 case TargetOpcode::G_FDIV:
3362 case TargetOpcode::G_FREM:
3363 case TargetOpcode::G_FCEIL:
3364 case TargetOpcode::G_FFLOOR:
3365 case TargetOpcode::G_FCOS:
3366 case TargetOpcode::G_FSIN:
3367 case TargetOpcode::G_FTAN:
3368 case TargetOpcode::G_FACOS:
3369 case TargetOpcode::G_FASIN:
3370 case TargetOpcode::G_FATAN:
3371 case TargetOpcode::G_FATAN2:
3372 case TargetOpcode::G_FCOSH:
3373 case TargetOpcode::G_FSINH:
3374 case TargetOpcode::G_FTANH:
3375 case TargetOpcode::G_FLOG10:
3376 case TargetOpcode::G_FLOG:
3377 case TargetOpcode::G_FLOG2:
3378 case TargetOpcode::G_FRINT:
3379 case TargetOpcode::G_FNEARBYINT:
3380 case TargetOpcode::G_FSQRT:
3381 case TargetOpcode::G_FEXP:
3382 case TargetOpcode::G_FEXP2:
3383 case TargetOpcode::G_FEXP10:
3384 case TargetOpcode::G_FPOW:
3385 case TargetOpcode::G_INTRINSIC_TRUNC:
3386 case TargetOpcode::G_INTRINSIC_ROUND:
3387 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3388 assert(TypeIdx == 0);
3389 Observer.changingInstr(MI);
3390
3391 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3392 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3393
3394 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3395 Observer.changedInstr(MI);
3396 return Legalized;
3397 case TargetOpcode::G_FMODF: {
3398 Observer.changingInstr(MI);
3399 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3400
3401 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3402 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3403 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3404 Observer.changedInstr(MI);
3405 return Legalized;
3406 }
3407 case TargetOpcode::G_FPOWI:
3408 case TargetOpcode::G_FLDEXP:
3409 case TargetOpcode::G_STRICT_FLDEXP: {
3410 if (TypeIdx == 0) {
3411 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3412 return UnableToLegalize;
3413
3414 Observer.changingInstr(MI);
3415 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3416 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3417 Observer.changedInstr(MI);
3418 return Legalized;
3419 }
3420
3421 if (TypeIdx == 1) {
3422 // For some reason SelectionDAG tries to promote to a libcall without
3423 // actually changing the integer type for promotion.
3424 Observer.changingInstr(MI);
3425 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3426 Observer.changedInstr(MI);
3427 return Legalized;
3428 }
3429
3430 return UnableToLegalize;
3431 }
3432 case TargetOpcode::G_FFREXP: {
3433 Observer.changingInstr(MI);
3434
3435 if (TypeIdx == 0) {
3436 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3437 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3438 } else {
3439 widenScalarDst(MI, WideTy, 1);
3440 }
3441
3442 Observer.changedInstr(MI);
3443 return Legalized;
3444 }
3445 case TargetOpcode::G_LROUND:
3446 case TargetOpcode::G_LLROUND:
3447 Observer.changingInstr(MI);
3448
3449 if (TypeIdx == 0)
3450 widenScalarDst(MI, WideTy);
3451 else
3452 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3453
3454 Observer.changedInstr(MI);
3455 return Legalized;
3456
3457 case TargetOpcode::G_INTTOPTR:
3458 if (TypeIdx != 1)
3459 return UnableToLegalize;
3460
3461 Observer.changingInstr(MI);
3462 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3463 Observer.changedInstr(MI);
3464 return Legalized;
3465 case TargetOpcode::G_PTRTOINT:
3466 if (TypeIdx != 0)
3467 return UnableToLegalize;
3468
3469 Observer.changingInstr(MI);
3470 widenScalarDst(MI, WideTy, 0);
3471 Observer.changedInstr(MI);
3472 return Legalized;
3473 case TargetOpcode::G_BUILD_VECTOR: {
3474 Observer.changingInstr(MI);
3475
3476 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3477 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3478 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3479
3480 // Avoid changing the result vector type if the source element type was
3481 // requested.
3482 if (TypeIdx == 1) {
3483 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3484 } else {
3485 widenScalarDst(MI, WideTy, 0);
3486 }
3487
3488 Observer.changedInstr(MI);
3489 return Legalized;
3490 }
3491 case TargetOpcode::G_SEXT_INREG:
3492 if (TypeIdx != 0)
3493 return UnableToLegalize;
3494
3495 Observer.changingInstr(MI);
3496 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3497 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3498 Observer.changedInstr(MI);
3499 return Legalized;
3500 case TargetOpcode::G_PTRMASK: {
3501 if (TypeIdx != 1)
3502 return UnableToLegalize;
3503 Observer.changingInstr(MI);
3504 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3505 Observer.changedInstr(MI);
3506 return Legalized;
3507 }
3508 case TargetOpcode::G_VECREDUCE_ADD: {
3509 if (TypeIdx != 1)
3510 return UnableToLegalize;
3511 Observer.changingInstr(MI);
3512 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3513 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3514 Observer.changedInstr(MI);
3515 return Legalized;
3516 }
3517 case TargetOpcode::G_VECREDUCE_FADD:
3518 case TargetOpcode::G_VECREDUCE_FMUL:
3519 case TargetOpcode::G_VECREDUCE_FMIN:
3520 case TargetOpcode::G_VECREDUCE_FMAX:
3521 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3522 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3523 if (TypeIdx != 0)
3524 return UnableToLegalize;
3525 Observer.changingInstr(MI);
3526 Register VecReg = MI.getOperand(1).getReg();
3527 LLT VecTy = MRI.getType(VecReg);
3528 LLT WideVecTy = VecTy.changeElementType(WideTy);
3529 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3530 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3531 Observer.changedInstr(MI);
3532 return Legalized;
3533 }
3534 case TargetOpcode::G_VSCALE: {
3535 MachineOperand &SrcMO = MI.getOperand(1);
3536 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3537 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3538 // The CImm is always a signed value
3539 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3540 Observer.changingInstr(MI);
3541 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3542 widenScalarDst(MI, WideTy);
3543 Observer.changedInstr(MI);
3544 return Legalized;
3545 }
3546 case TargetOpcode::G_SPLAT_VECTOR: {
3547 if (TypeIdx != 1)
3548 return UnableToLegalize;
3549
3550 Observer.changingInstr(MI);
3551 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3552 Observer.changedInstr(MI);
3553 return Legalized;
3554 }
3555 case TargetOpcode::G_INSERT_SUBVECTOR: {
3556 if (TypeIdx != 0)
3557 return UnableToLegalize;
3558
3560 Register BigVec = IS.getBigVec();
3561 Register SubVec = IS.getSubVec();
3562
3563 LLT SubVecTy = MRI.getType(SubVec);
3564 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3565
3566 // Widen the G_INSERT_SUBVECTOR
3567 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3568 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3569 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3570 IS.getIndexImm());
3571
3572 // Truncate back down
3573 auto SplatZero = MIRBuilder.buildSplatVector(
3574 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3575 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3576 SplatZero);
3577
3578 MI.eraseFromParent();
3579
3580 return Legalized;
3581 }
3582 }
3583}
3584
3586 MachineIRBuilder &B, Register Src, LLT Ty) {
3587 auto Unmerge = B.buildUnmerge(Ty, Src);
3588 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3589 Pieces.push_back(Unmerge.getReg(I));
3590}
3591
3592static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3593 MachineIRBuilder &MIRBuilder) {
3594 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3595 MachineFunction &MF = MIRBuilder.getMF();
3596 const DataLayout &DL = MIRBuilder.getDataLayout();
3597 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3598 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3599 LLT DstLLT = MRI.getType(DstReg);
3600
3601 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3602
3603 auto Addr = MIRBuilder.buildConstantPool(
3604 AddrPtrTy,
3605 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3606
3607 MachineMemOperand *MMO =
3609 MachineMemOperand::MOLoad, DstLLT, Alignment);
3610
3611 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3612}
3613
3616 const MachineOperand &ConstOperand = MI.getOperand(1);
3617 const Constant *ConstantVal = ConstOperand.getCImm();
3618
3619 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3620 MI.eraseFromParent();
3621
3622 return Legalized;
3623}
3624
3627 const MachineOperand &ConstOperand = MI.getOperand(1);
3628 const Constant *ConstantVal = ConstOperand.getFPImm();
3629
3630 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3631 MI.eraseFromParent();
3632
3633 return Legalized;
3634}
3635
3638 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3639 if (SrcTy.isVector()) {
3640 LLT SrcEltTy = SrcTy.getElementType();
3642
3643 if (DstTy.isVector()) {
3644 int NumDstElt = DstTy.getNumElements();
3645 int NumSrcElt = SrcTy.getNumElements();
3646
3647 LLT DstEltTy = DstTy.getElementType();
3648 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3649 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3650
3651 // If there's an element size mismatch, insert intermediate casts to match
3652 // the result element type.
3653 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3654 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3655 //
3656 // =>
3657 //
3658 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3659 // %3:_(<2 x s8>) = G_BITCAST %2
3660 // %4:_(<2 x s8>) = G_BITCAST %3
3661 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3662 DstCastTy = DstTy.changeVectorElementCount(
3663 ElementCount::getFixed(NumDstElt / NumSrcElt));
3664 SrcPartTy = SrcEltTy;
3665 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3666 //
3667 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3668 //
3669 // =>
3670 //
3671 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3672 // %3:_(s16) = G_BITCAST %2
3673 // %4:_(s16) = G_BITCAST %3
3674 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3675 SrcPartTy = SrcTy.changeVectorElementCount(
3676 ElementCount::getFixed(NumSrcElt / NumDstElt));
3677 DstCastTy = DstEltTy;
3678 }
3679
3680 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3681 for (Register &SrcReg : SrcRegs)
3682 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3683 } else
3684 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3685
3686 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3687 MI.eraseFromParent();
3688 return Legalized;
3689 }
3690
3691 if (DstTy.isVector()) {
3693 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3694 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3695 MI.eraseFromParent();
3696 return Legalized;
3697 }
3698
3699 return UnableToLegalize;
3700}
3701
3702/// Figure out the bit offset into a register when coercing a vector index for
3703/// the wide element type. This is only for the case when promoting vector to
3704/// one with larger elements.
3705//
3706///
3707/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3708/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3710 Register Idx,
3711 unsigned NewEltSize,
3712 unsigned OldEltSize) {
3713 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3714 LLT IdxTy = B.getMRI()->getType(Idx);
3715
3716 // Now figure out the amount we need to shift to get the target bits.
3717 auto OffsetMask = B.buildConstant(
3718 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3719 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3720 return B.buildShl(IdxTy, OffsetIdx,
3721 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3722}
3723
3724/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3725/// is casting to a vector with a smaller element size, perform multiple element
3726/// extracts and merge the results. If this is coercing to a vector with larger
3727/// elements, index the bitcasted vector and extract the target element with bit
3728/// operations. This is intended to force the indexing in the native register
3729/// size for architectures that can dynamically index the register file.
3732 LLT CastTy) {
3733 if (TypeIdx != 1)
3734 return UnableToLegalize;
3735
3736 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3737
3738 LLT SrcEltTy = SrcVecTy.getElementType();
3739 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3740 unsigned OldNumElts = SrcVecTy.getNumElements();
3741
3742 LLT NewEltTy = CastTy.getScalarType();
3743 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3744
3745 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3746 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3747 if (NewNumElts > OldNumElts) {
3748 // Decreasing the vector element size
3749 //
3750 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3751 // =>
3752 // v4i32:castx = bitcast x:v2i64
3753 //
3754 // i64 = bitcast
3755 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3756 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3757 //
3758 if (NewNumElts % OldNumElts != 0)
3759 return UnableToLegalize;
3760
3761 // Type of the intermediate result vector.
3762 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3763 LLT MidTy =
3764 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3765
3766 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3767
3768 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3769 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3770
3771 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3772 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3773 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3774 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3775 NewOps[I] = Elt.getReg(0);
3776 }
3777
3778 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3779 MIRBuilder.buildBitcast(Dst, NewVec);
3780 MI.eraseFromParent();
3781 return Legalized;
3782 }
3783
3784 if (NewNumElts < OldNumElts) {
3785 if (NewEltSize % OldEltSize != 0)
3786 return UnableToLegalize;
3787
3788 // This only depends on powers of 2 because we use bit tricks to figure out
3789 // the bit offset we need to shift to get the target element. A general
3790 // expansion could emit division/multiply.
3791 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3792 return UnableToLegalize;
3793
3794 // Increasing the vector element size.
3795 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3796 //
3797 // =>
3798 //
3799 // %cast = G_BITCAST %vec
3800 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3801 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3802 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3803 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3804 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3805 // %elt = G_TRUNC %elt_bits
3806
3807 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3808 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3809
3810 // Divide to get the index in the wider element type.
3811 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3812
3813 Register WideElt = CastVec;
3814 if (CastTy.isVector()) {
3815 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3816 ScaledIdx).getReg(0);
3817 }
3818
3819 // Compute the bit offset into the register of the target element.
3821 MIRBuilder, Idx, NewEltSize, OldEltSize);
3822
3823 // Shift the wide element to get the target element.
3824 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3825 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3826 MI.eraseFromParent();
3827 return Legalized;
3828 }
3829
3830 return UnableToLegalize;
3831}
3832
3833/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3834/// TargetReg, while preserving other bits in \p TargetReg.
3835///
3836/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3838 Register TargetReg, Register InsertReg,
3839 Register OffsetBits) {
3840 LLT TargetTy = B.getMRI()->getType(TargetReg);
3841 LLT InsertTy = B.getMRI()->getType(InsertReg);
3842 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3843 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3844
3845 // Produce a bitmask of the value to insert
3846 auto EltMask = B.buildConstant(
3847 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3848 InsertTy.getSizeInBits()));
3849 // Shift it into position
3850 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3851 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3852
3853 // Clear out the bits in the wide element
3854 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3855
3856 // The value to insert has all zeros already, so stick it into the masked
3857 // wide element.
3858 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3859}
3860
3861/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3862/// is increasing the element size, perform the indexing in the target element
3863/// type, and use bit operations to insert at the element position. This is
3864/// intended for architectures that can dynamically index the register file and
3865/// want to force indexing in the native register size.
3868 LLT CastTy) {
3869 if (TypeIdx != 0)
3870 return UnableToLegalize;
3871
3872 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3873 MI.getFirst4RegLLTs();
3874 LLT VecTy = DstTy;
3875
3876 LLT VecEltTy = VecTy.getElementType();
3877 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3878 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3879 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3880
3881 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3882 unsigned OldNumElts = VecTy.getNumElements();
3883
3884 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3885 if (NewNumElts < OldNumElts) {
3886 if (NewEltSize % OldEltSize != 0)
3887 return UnableToLegalize;
3888
3889 // This only depends on powers of 2 because we use bit tricks to figure out
3890 // the bit offset we need to shift to get the target element. A general
3891 // expansion could emit division/multiply.
3892 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3893 return UnableToLegalize;
3894
3895 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3896 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3897
3898 // Divide to get the index in the wider element type.
3899 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3900
3901 Register ExtractedElt = CastVec;
3902 if (CastTy.isVector()) {
3903 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3904 ScaledIdx).getReg(0);
3905 }
3906
3907 // Compute the bit offset into the register of the target element.
3909 MIRBuilder, Idx, NewEltSize, OldEltSize);
3910
3911 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3912 Val, OffsetBits);
3913 if (CastTy.isVector()) {
3914 InsertedElt = MIRBuilder.buildInsertVectorElement(
3915 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3916 }
3917
3918 MIRBuilder.buildBitcast(Dst, InsertedElt);
3919 MI.eraseFromParent();
3920 return Legalized;
3921 }
3922
3923 return UnableToLegalize;
3924}
3925
3926// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3927// those that have smaller than legal operands.
3928//
3929// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3930//
3931// ===>
3932//
3933// s32 = G_BITCAST <4 x s8>
3934// s32 = G_BITCAST <4 x s8>
3935// s32 = G_BITCAST <4 x s8>
3936// s32 = G_BITCAST <4 x s8>
3937// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3938// <16 x s8> = G_BITCAST <4 x s32>
3941 LLT CastTy) {
3942 // Convert it to CONCAT instruction
3943 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3944 if (!ConcatMI) {
3945 return UnableToLegalize;
3946 }
3947
3948 // Check if bitcast is Legal
3949 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3950 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3951
3952 // Check if the build vector is Legal
3953 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3954 return UnableToLegalize;
3955 }
3956
3957 // Bitcast the sources
3958 SmallVector<Register> BitcastRegs;
3959 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3960 BitcastRegs.push_back(
3961 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3962 .getReg(0));
3963 }
3964
3965 // Build the scalar values into a vector
3966 Register BuildReg =
3967 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3968 MIRBuilder.buildBitcast(DstReg, BuildReg);
3969
3970 MI.eraseFromParent();
3971 return Legalized;
3972}
3973
3974// This bitcasts a shuffle vector to a different type currently of the same
3975// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3976// will be used instead.
3977//
3978// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3979// ===>
3980// <4 x s64> = G_PTRTOINT <4 x p0>
3981// <4 x s64> = G_PTRTOINT <4 x p0>
3982// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3983// <16 x p0> = G_INTTOPTR <16 x s64>
3986 LLT CastTy) {
3987 auto ShuffleMI = cast<GShuffleVector>(&MI);
3988 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3989 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3990
3991 // We currently only handle vectors of the same size.
3992 if (TypeIdx != 0 ||
3993 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3994 CastTy.getElementCount() != DstTy.getElementCount())
3995 return UnableToLegalize;
3996
3997 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3998
3999 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4000 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4001 auto Shuf =
4002 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4003 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4004
4005 MI.eraseFromParent();
4006 return Legalized;
4007}
4008
4009/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4010///
4011/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4012///
4013/// ===>
4014///
4015/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4016/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4017/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4020 LLT CastTy) {
4021 auto ES = cast<GExtractSubvector>(&MI);
4022
4023 if (!CastTy.isVector())
4024 return UnableToLegalize;
4025
4026 if (TypeIdx != 0)
4027 return UnableToLegalize;
4028
4029 Register Dst = ES->getReg(0);
4030 Register Src = ES->getSrcVec();
4031 uint64_t Idx = ES->getIndexImm();
4032
4033 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4034
4035 LLT DstTy = MRI.getType(Dst);
4036 LLT SrcTy = MRI.getType(Src);
4037 ElementCount DstTyEC = DstTy.getElementCount();
4038 ElementCount SrcTyEC = SrcTy.getElementCount();
4039 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4040 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4041
4042 if (DstTy == CastTy)
4043 return Legalized;
4044
4045 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4046 return UnableToLegalize;
4047
4048 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4049 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4050 if (CastEltSize < DstEltSize)
4051 return UnableToLegalize;
4052
4053 auto AdjustAmt = CastEltSize / DstEltSize;
4054 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4055 SrcTyMinElts % AdjustAmt != 0)
4056 return UnableToLegalize;
4057
4058 Idx /= AdjustAmt;
4059 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4060 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4061 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4062 MIRBuilder.buildBitcast(Dst, PromotedES);
4063
4064 ES->eraseFromParent();
4065 return Legalized;
4066}
4067
4068/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4069///
4070/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4071/// <vscale x 8 x i1>,
4072/// N
4073///
4074/// ===>
4075///
4076/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4077/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4078/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4079/// <vscale x 1 x i8>, N / 8
4080/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4083 LLT CastTy) {
4084 auto ES = cast<GInsertSubvector>(&MI);
4085
4086 if (!CastTy.isVector())
4087 return UnableToLegalize;
4088
4089 if (TypeIdx != 0)
4090 return UnableToLegalize;
4091
4092 Register Dst = ES->getReg(0);
4093 Register BigVec = ES->getBigVec();
4094 Register SubVec = ES->getSubVec();
4095 uint64_t Idx = ES->getIndexImm();
4096
4097 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4098
4099 LLT DstTy = MRI.getType(Dst);
4100 LLT BigVecTy = MRI.getType(BigVec);
4101 LLT SubVecTy = MRI.getType(SubVec);
4102
4103 if (DstTy == CastTy)
4104 return Legalized;
4105
4106 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4107 return UnableToLegalize;
4108
4109 ElementCount DstTyEC = DstTy.getElementCount();
4110 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4111 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4112 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4113 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4114 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4115
4116 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4117 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4118 if (CastEltSize < DstEltSize)
4119 return UnableToLegalize;
4120
4121 auto AdjustAmt = CastEltSize / DstEltSize;
4122 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4123 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4124 return UnableToLegalize;
4125
4126 Idx /= AdjustAmt;
4127 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4128 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4129 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4130 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4131 auto PromotedIS =
4132 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4133 MIRBuilder.buildBitcast(Dst, PromotedIS);
4134
4135 ES->eraseFromParent();
4136 return Legalized;
4137}
4138
4140 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4141 Register DstReg = LoadMI.getDstReg();
4142 Register PtrReg = LoadMI.getPointerReg();
4143 LLT DstTy = MRI.getType(DstReg);
4144 MachineMemOperand &MMO = LoadMI.getMMO();
4145 LLT MemTy = MMO.getMemoryType();
4146 MachineFunction &MF = MIRBuilder.getMF();
4147
4148 unsigned MemSizeInBits = MemTy.getSizeInBits();
4149 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4150
4151 if (MemSizeInBits != MemStoreSizeInBits) {
4152 if (MemTy.isVector())
4153 return UnableToLegalize;
4154
4155 // Promote to a byte-sized load if not loading an integral number of
4156 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4157 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4158 MachineMemOperand *NewMMO =
4159 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4160
4161 Register LoadReg = DstReg;
4162 LLT LoadTy = DstTy;
4163
4164 // If this wasn't already an extending load, we need to widen the result
4165 // register to avoid creating a load with a narrower result than the source.
4166 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4167 LoadTy = WideMemTy;
4168 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4169 }
4170
4171 if (isa<GSExtLoad>(LoadMI)) {
4172 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4173 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4174 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4175 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4176 // The extra bits are guaranteed to be zero, since we stored them that
4177 // way. A zext load from Wide thus automatically gives zext from MemVT.
4178 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4179 } else {
4180 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4181 }
4182
4183 if (DstTy != LoadTy)
4184 MIRBuilder.buildTrunc(DstReg, LoadReg);
4185
4186 LoadMI.eraseFromParent();
4187 return Legalized;
4188 }
4189
4190 // Big endian lowering not implemented.
4191 if (MIRBuilder.getDataLayout().isBigEndian())
4192 return UnableToLegalize;
4193
4194 // This load needs splitting into power of 2 sized loads.
4195 //
4196 // Our strategy here is to generate anyextending loads for the smaller
4197 // types up to next power-2 result type, and then combine the two larger
4198 // result values together, before truncating back down to the non-pow-2
4199 // type.
4200 // E.g. v1 = i24 load =>
4201 // v2 = i32 zextload (2 byte)
4202 // v3 = i32 load (1 byte)
4203 // v4 = i32 shl v3, 16
4204 // v5 = i32 or v4, v2
4205 // v1 = i24 trunc v5
4206 // By doing this we generate the correct truncate which should get
4207 // combined away as an artifact with a matching extend.
4208
4209 uint64_t LargeSplitSize, SmallSplitSize;
4210
4211 if (!isPowerOf2_32(MemSizeInBits)) {
4212 // This load needs splitting into power of 2 sized loads.
4213 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4214 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4215 } else {
4216 // This is already a power of 2, but we still need to split this in half.
4217 //
4218 // Assume we're being asked to decompose an unaligned load.
4219 // TODO: If this requires multiple splits, handle them all at once.
4220 auto &Ctx = MF.getFunction().getContext();
4221 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4222 return UnableToLegalize;
4223
4224 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4225 }
4226
4227 if (MemTy.isVector()) {
4228 // TODO: Handle vector extloads
4229 if (MemTy != DstTy)
4230 return UnableToLegalize;
4231
4232 Align Alignment = LoadMI.getAlign();
4233 // Given an alignment larger than the size of the memory, we can increase
4234 // the size of the load without needing to scalarize it.
4235 if (Alignment.value() * 8 > MemSizeInBits &&
4237 LLT MoreTy = DstTy.changeVectorElementCount(
4239 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4240 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4241 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4242 NewLoad.getReg(0));
4243 LoadMI.eraseFromParent();
4244 return Legalized;
4245 }
4246
4247 // TODO: We can do better than scalarizing the vector and at least split it
4248 // in half.
4249 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4250 }
4251
4252 MachineMemOperand *LargeMMO =
4253 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4254 MachineMemOperand *SmallMMO =
4255 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4256
4257 LLT PtrTy = MRI.getType(PtrReg);
4258 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4259 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4260 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4261 PtrReg, *LargeMMO);
4262
4263 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4264 LargeSplitSize / 8);
4265 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4266 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4267 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4268 SmallPtr, *SmallMMO);
4269
4270 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4271 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4272
4273 if (AnyExtTy == DstTy)
4274 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4275 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4276 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4277 MIRBuilder.buildTrunc(DstReg, {Or});
4278 } else {
4279 assert(DstTy.isPointer() && "expected pointer");
4280 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4281
4282 // FIXME: We currently consider this to be illegal for non-integral address
4283 // spaces, but we need still need a way to reinterpret the bits.
4284 MIRBuilder.buildIntToPtr(DstReg, Or);
4285 }
4286
4287 LoadMI.eraseFromParent();
4288 return Legalized;
4289}
4290
4292 // Lower a non-power of 2 store into multiple pow-2 stores.
4293 // E.g. split an i24 store into an i16 store + i8 store.
4294 // We do this by first extending the stored value to the next largest power
4295 // of 2 type, and then using truncating stores to store the components.
4296 // By doing this, likewise with G_LOAD, generate an extend that can be
4297 // artifact-combined away instead of leaving behind extracts.
4298 Register SrcReg = StoreMI.getValueReg();
4299 Register PtrReg = StoreMI.getPointerReg();
4300 LLT SrcTy = MRI.getType(SrcReg);
4301 MachineFunction &MF = MIRBuilder.getMF();
4302 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4303 LLT MemTy = MMO.getMemoryType();
4304
4305 unsigned StoreWidth = MemTy.getSizeInBits();
4306 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4307
4308 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4309 // Promote to a byte-sized store with upper bits zero if not
4310 // storing an integral number of bytes. For example, promote
4311 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4312 LLT WideTy = LLT::scalar(StoreSizeInBits);
4313
4314 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4315 // Avoid creating a store with a narrower source than result.
4316 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4317 SrcTy = WideTy;
4318 }
4319
4320 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4321
4322 MachineMemOperand *NewMMO =
4323 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4324 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4325 StoreMI.eraseFromParent();
4326 return Legalized;
4327 }
4328
4329 if (MemTy.isVector()) {
4330 if (MemTy != SrcTy)
4331 return scalarizeVectorBooleanStore(StoreMI);
4332
4333 // TODO: We can do better than scalarizing the vector and at least split it
4334 // in half.
4335 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4336 }
4337
4338 unsigned MemSizeInBits = MemTy.getSizeInBits();
4339 uint64_t LargeSplitSize, SmallSplitSize;
4340
4341 if (!isPowerOf2_32(MemSizeInBits)) {
4342 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4343 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4344 } else {
4345 auto &Ctx = MF.getFunction().getContext();
4346 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4347 return UnableToLegalize; // Don't know what we're being asked to do.
4348
4349 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4350 }
4351
4352 // Extend to the next pow-2. If this store was itself the result of lowering,
4353 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4354 // that's wider than the stored size.
4355 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4356 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4357
4358 if (SrcTy.isPointer()) {
4359 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4360 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4361 }
4362
4363 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4364
4365 // Obtain the smaller value by shifting away the larger value.
4366 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4367 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4368
4369 // Generate the PtrAdd and truncating stores.
4370 LLT PtrTy = MRI.getType(PtrReg);
4371 auto OffsetCst = MIRBuilder.buildConstant(
4372 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4373 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4374
4375 MachineMemOperand *LargeMMO =
4376 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4377 MachineMemOperand *SmallMMO =
4378 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4379 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4380 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4381 StoreMI.eraseFromParent();
4382 return Legalized;
4383}
4384
4387 Register SrcReg = StoreMI.getValueReg();
4388 Register PtrReg = StoreMI.getPointerReg();
4389 LLT SrcTy = MRI.getType(SrcReg);
4390 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4391 LLT MemTy = MMO.getMemoryType();
4392 LLT MemScalarTy = MemTy.getElementType();
4393 MachineFunction &MF = MIRBuilder.getMF();
4394
4395 assert(SrcTy.isVector() && "Expect a vector store type");
4396
4397 if (!MemScalarTy.isByteSized()) {
4398 // We need to build an integer scalar of the vector bit pattern.
4399 // It's not legal for us to add padding when storing a vector.
4400 unsigned NumBits = MemTy.getSizeInBits();
4401 LLT IntTy = LLT::scalar(NumBits);
4402 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4403 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4404
4405 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4406 auto Elt = MIRBuilder.buildExtractVectorElement(
4407 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4408 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4409 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4410 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4411 ? (MemTy.getNumElements() - 1) - I
4412 : I;
4413 auto ShiftAmt = MIRBuilder.buildConstant(
4414 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4415 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4416 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4417 }
4418 auto PtrInfo = MMO.getPointerInfo();
4419 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4420 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4421 StoreMI.eraseFromParent();
4422 return Legalized;
4423 }
4424
4425 // TODO: implement simple scalarization.
4426 return UnableToLegalize;
4427}
4428
4430LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4431 switch (MI.getOpcode()) {
4432 case TargetOpcode::G_LOAD: {
4433 if (TypeIdx != 0)
4434 return UnableToLegalize;
4435 MachineMemOperand &MMO = **MI.memoperands_begin();
4436
4437 // Not sure how to interpret a bitcast of an extending load.
4438 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4439 return UnableToLegalize;
4440
4441 Observer.changingInstr(MI);
4442 bitcastDst(MI, CastTy, 0);
4443 MMO.setType(CastTy);
4444 // The range metadata is no longer valid when reinterpreted as a different
4445 // type.
4446 MMO.clearRanges();
4447 Observer.changedInstr(MI);
4448 return Legalized;
4449 }
4450 case TargetOpcode::G_STORE: {
4451 if (TypeIdx != 0)
4452 return UnableToLegalize;
4453
4454 MachineMemOperand &MMO = **MI.memoperands_begin();
4455
4456 // Not sure how to interpret a bitcast of a truncating store.
4457 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4458 return UnableToLegalize;
4459
4460 Observer.changingInstr(MI);
4461 bitcastSrc(MI, CastTy, 0);
4462 MMO.setType(CastTy);
4463 Observer.changedInstr(MI);
4464 return Legalized;
4465 }
4466 case TargetOpcode::G_SELECT: {
4467 if (TypeIdx != 0)
4468 return UnableToLegalize;
4469
4470 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4471 LLVM_DEBUG(
4472 dbgs() << "bitcast action not implemented for vector select\n");
4473 return UnableToLegalize;
4474 }
4475
4476 Observer.changingInstr(MI);
4477 bitcastSrc(MI, CastTy, 2);
4478 bitcastSrc(MI, CastTy, 3);
4479 bitcastDst(MI, CastTy, 0);
4480 Observer.changedInstr(MI);
4481 return Legalized;
4482 }
4483 case TargetOpcode::G_AND:
4484 case TargetOpcode::G_OR:
4485 case TargetOpcode::G_XOR: {
4486 Observer.changingInstr(MI);
4487 bitcastSrc(MI, CastTy, 1);
4488 bitcastSrc(MI, CastTy, 2);
4489 bitcastDst(MI, CastTy, 0);
4490 Observer.changedInstr(MI);
4491 return Legalized;
4492 }
4493 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4494 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4495 case TargetOpcode::G_INSERT_VECTOR_ELT:
4496 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4497 case TargetOpcode::G_CONCAT_VECTORS:
4498 return bitcastConcatVector(MI, TypeIdx, CastTy);
4499 case TargetOpcode::G_SHUFFLE_VECTOR:
4500 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4501 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4502 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4503 case TargetOpcode::G_INSERT_SUBVECTOR:
4504 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4505 default:
4506 return UnableToLegalize;
4507 }
4508}
4509
4510// Legalize an instruction by changing the opcode in place.
4511void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4513 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4515}
4516
4518LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4519 using namespace TargetOpcode;
4520
4521 switch(MI.getOpcode()) {
4522 default:
4523 return UnableToLegalize;
4524 case TargetOpcode::G_FCONSTANT:
4525 return lowerFConstant(MI);
4526 case TargetOpcode::G_BITCAST:
4527 return lowerBitcast(MI);
4528 case TargetOpcode::G_SREM:
4529 case TargetOpcode::G_UREM: {
4530 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4531 auto Quot =
4532 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4533 {MI.getOperand(1), MI.getOperand(2)});
4534
4535 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4536 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4537 MI.eraseFromParent();
4538 return Legalized;
4539 }
4540 case TargetOpcode::G_SADDO:
4541 case TargetOpcode::G_SSUBO:
4542 return lowerSADDO_SSUBO(MI);
4543 case TargetOpcode::G_SADDE:
4544 return lowerSADDE(MI);
4545 case TargetOpcode::G_SSUBE:
4546 return lowerSSUBE(MI);
4547 case TargetOpcode::G_UMULH:
4548 case TargetOpcode::G_SMULH:
4549 return lowerSMULH_UMULH(MI);
4550 case TargetOpcode::G_SMULO:
4551 case TargetOpcode::G_UMULO: {
4552 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4553 // result.
4554 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4555 LLT Ty = MRI.getType(Res);
4556
4557 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4558 ? TargetOpcode::G_SMULH
4559 : TargetOpcode::G_UMULH;
4560
4561 Observer.changingInstr(MI);
4562 const auto &TII = MIRBuilder.getTII();
4563 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4564 MI.removeOperand(1);
4565 Observer.changedInstr(MI);
4566
4567 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4568 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4569
4570 // Move insert point forward so we can use the Res register if needed.
4571 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4572
4573 // For *signed* multiply, overflow is detected by checking:
4574 // (hi != (lo >> bitwidth-1))
4575 if (Opcode == TargetOpcode::G_SMULH) {
4576 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4577 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4578 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4579 } else {
4580 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4581 }
4582 return Legalized;
4583 }
4584 case TargetOpcode::G_FNEG: {
4585 auto [Res, SubByReg] = MI.getFirst2Regs();
4586 LLT Ty = MRI.getType(Res);
4587
4588 auto SignMask = MIRBuilder.buildConstant(
4589 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4590 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4591 MI.eraseFromParent();
4592 return Legalized;
4593 }
4594 case TargetOpcode::G_FSUB:
4595 case TargetOpcode::G_STRICT_FSUB: {
4596 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4597 LLT Ty = MRI.getType(Res);
4598
4599 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4600 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4601
4602 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4603 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4604 else
4605 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4606
4607 MI.eraseFromParent();
4608 return Legalized;
4609 }
4610 case TargetOpcode::G_FMAD:
4611 return lowerFMad(MI);
4612 case TargetOpcode::G_FFLOOR:
4613 return lowerFFloor(MI);
4614 case TargetOpcode::G_LROUND:
4615 case TargetOpcode::G_LLROUND: {
4616 Register DstReg = MI.getOperand(0).getReg();
4617 Register SrcReg = MI.getOperand(1).getReg();
4618 LLT SrcTy = MRI.getType(SrcReg);
4619 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4620 {SrcReg});
4621 MIRBuilder.buildFPTOSI(DstReg, Round);
4622 MI.eraseFromParent();
4623 return Legalized;
4624 }
4625 case TargetOpcode::G_INTRINSIC_ROUND:
4626 return lowerIntrinsicRound(MI);
4627 case TargetOpcode::G_FRINT: {
4628 // Since round even is the assumed rounding mode for unconstrained FP
4629 // operations, rint and roundeven are the same operation.
4630 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4631 return Legalized;
4632 }
4633 case TargetOpcode::G_INTRINSIC_LRINT:
4634 case TargetOpcode::G_INTRINSIC_LLRINT: {
4635 Register DstReg = MI.getOperand(0).getReg();
4636 Register SrcReg = MI.getOperand(1).getReg();
4637 LLT SrcTy = MRI.getType(SrcReg);
4638 auto Round =
4639 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4640 MIRBuilder.buildFPTOSI(DstReg, Round);
4641 MI.eraseFromParent();
4642 return Legalized;
4643 }
4644 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4645 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4646 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4647 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4648 **MI.memoperands_begin());
4649 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4650 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4651 MI.eraseFromParent();
4652 return Legalized;
4653 }
4654 case TargetOpcode::G_LOAD:
4655 case TargetOpcode::G_SEXTLOAD:
4656 case TargetOpcode::G_ZEXTLOAD:
4657 return lowerLoad(cast<GAnyLoad>(MI));
4658 case TargetOpcode::G_STORE:
4659 return lowerStore(cast<GStore>(MI));
4660 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4661 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4662 case TargetOpcode::G_CTLZ:
4663 case TargetOpcode::G_CTTZ:
4664 case TargetOpcode::G_CTPOP:
4665 case TargetOpcode::G_CTLS:
4666 return lowerBitCount(MI);
4667 case G_UADDO: {
4668 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4669
4670 Register NewRes = MRI.cloneVirtualRegister(Res);
4671
4672 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4673 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4674
4675 MIRBuilder.buildCopy(Res, NewRes);
4676
4677 MI.eraseFromParent();
4678 return Legalized;
4679 }
4680 case G_UADDE: {
4681 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4682 const LLT CondTy = MRI.getType(CarryOut);
4683 const LLT Ty = MRI.getType(Res);
4684
4685 Register NewRes = MRI.cloneVirtualRegister(Res);
4686
4687 // Initial add of the two operands.
4688 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4689
4690 // Initial check for carry.
4691 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4692
4693 // Add the sum and the carry.
4694 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4695 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4696
4697 // Second check for carry. We can only carry if the initial sum is all 1s
4698 // and the carry is set, resulting in a new sum of 0.
4699 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4700 auto ResEqZero =
4701 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4702 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4703 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4704
4705 MIRBuilder.buildCopy(Res, NewRes);
4706
4707 MI.eraseFromParent();
4708 return Legalized;
4709 }
4710 case G_USUBO: {
4711 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4712
4713 MIRBuilder.buildSub(Res, LHS, RHS);
4714 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4715
4716 MI.eraseFromParent();
4717 return Legalized;
4718 }
4719 case G_USUBE: {
4720 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4721 const LLT CondTy = MRI.getType(BorrowOut);
4722 const LLT Ty = MRI.getType(Res);
4723
4724 // Initial subtract of the two operands.
4725 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4726
4727 // Initial check for borrow.
4728 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4729
4730 // Subtract the borrow from the first subtract.
4731 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4732 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4733
4734 // Second check for borrow. We can only borrow if the initial difference is
4735 // 0 and the borrow is set, resulting in a new difference of all 1s.
4736 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4737 auto TmpResEqZero =
4738 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4739 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4740 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4741
4742 MI.eraseFromParent();
4743 return Legalized;
4744 }
4745 case G_UITOFP:
4746 return lowerUITOFP(MI);
4747 case G_SITOFP:
4748 return lowerSITOFP(MI);
4749 case G_FPTOUI:
4750 return lowerFPTOUI(MI);
4751 case G_FPTOSI:
4752 return lowerFPTOSI(MI);
4753 case G_FPTOUI_SAT:
4754 case G_FPTOSI_SAT:
4755 return lowerFPTOINT_SAT(MI);
4756 case G_FPTRUNC:
4757 return lowerFPTRUNC(MI);
4758 case G_FPOWI:
4759 return lowerFPOWI(MI);
4760 case G_SMIN:
4761 case G_SMAX:
4762 case G_UMIN:
4763 case G_UMAX:
4764 return lowerMinMax(MI);
4765 case G_SCMP:
4766 case G_UCMP:
4767 return lowerThreewayCompare(MI);
4768 case G_FCOPYSIGN:
4769 return lowerFCopySign(MI);
4770 case G_FMINNUM:
4771 case G_FMAXNUM:
4772 case G_FMINIMUMNUM:
4773 case G_FMAXIMUMNUM:
4774 return lowerFMinNumMaxNum(MI);
4775 case G_FMINIMUM:
4776 case G_FMAXIMUM:
4777 return lowerFMinimumMaximum(MI);
4778 case G_MERGE_VALUES:
4779 return lowerMergeValues(MI);
4780 case G_UNMERGE_VALUES:
4781 return lowerUnmergeValues(MI);
4782 case TargetOpcode::G_SEXT_INREG: {
4783 assert(MI.getOperand(2).isImm() && "Expected immediate");
4784 int64_t SizeInBits = MI.getOperand(2).getImm();
4785
4786 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4787 LLT DstTy = MRI.getType(DstReg);
4788 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4789
4790 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4791 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4792 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4793 MI.eraseFromParent();
4794 return Legalized;
4795 }
4796 case G_EXTRACT_VECTOR_ELT:
4797 case G_INSERT_VECTOR_ELT:
4799 case G_SHUFFLE_VECTOR:
4800 return lowerShuffleVector(MI);
4801 case G_VECTOR_COMPRESS:
4802 return lowerVECTOR_COMPRESS(MI);
4803 case G_DYN_STACKALLOC:
4804 return lowerDynStackAlloc(MI);
4805 case G_STACKSAVE:
4806 return lowerStackSave(MI);
4807 case G_STACKRESTORE:
4808 return lowerStackRestore(MI);
4809 case G_EXTRACT:
4810 return lowerExtract(MI);
4811 case G_INSERT:
4812 return lowerInsert(MI);
4813 case G_BSWAP:
4814 return lowerBswap(MI);
4815 case G_BITREVERSE:
4816 return lowerBitreverse(MI);
4817 case G_READ_REGISTER:
4818 case G_WRITE_REGISTER:
4819 return lowerReadWriteRegister(MI);
4820 case G_UADDSAT:
4821 case G_USUBSAT: {
4822 // Try to make a reasonable guess about which lowering strategy to use. The
4823 // target can override this with custom lowering and calling the
4824 // implementation functions.
4825 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4826 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4827 return lowerAddSubSatToMinMax(MI);
4829 }
4830 case G_SADDSAT:
4831 case G_SSUBSAT: {
4832 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4833
4834 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4835 // since it's a shorter expansion. However, we would need to figure out the
4836 // preferred boolean type for the carry out for the query.
4837 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4838 return lowerAddSubSatToMinMax(MI);
4840 }
4841 case G_SSHLSAT:
4842 case G_USHLSAT:
4843 return lowerShlSat(MI);
4844 case G_ABS:
4845 return lowerAbsToAddXor(MI);
4846 case G_ABDS:
4847 case G_ABDU: {
4848 bool IsSigned = MI.getOpcode() == G_ABDS;
4849 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4850 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4851 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4852 return lowerAbsDiffToMinMax(MI);
4853 }
4854 return lowerAbsDiffToSelect(MI);
4855 }
4856 case G_FABS:
4857 return lowerFAbs(MI);
4858 case G_SELECT:
4859 return lowerSelect(MI);
4860 case G_IS_FPCLASS:
4861 return lowerISFPCLASS(MI);
4862 case G_SDIVREM:
4863 case G_UDIVREM:
4864 return lowerDIVREM(MI);
4865 case G_FSHL:
4866 case G_FSHR:
4867 return lowerFunnelShift(MI);
4868 case G_ROTL:
4869 case G_ROTR:
4870 return lowerRotate(MI);
4871 case G_MEMSET:
4872 case G_MEMCPY:
4873 case G_MEMMOVE:
4874 return lowerMemCpyFamily(MI);
4875 case G_MEMCPY_INLINE:
4876 return lowerMemcpyInline(MI);
4877 case G_ZEXT:
4878 case G_SEXT:
4879 case G_ANYEXT:
4880 return lowerEXT(MI);
4881 case G_TRUNC:
4882 return lowerTRUNC(MI);
4884 return lowerVectorReduction(MI);
4885 case G_VAARG:
4886 return lowerVAArg(MI);
4887 case G_ATOMICRMW_SUB: {
4888 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4889 const LLT ValTy = MRI.getType(Val);
4890 MachineMemOperand *MMO = *MI.memoperands_begin();
4891
4892 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4893 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4894 MI.eraseFromParent();
4895 return Legalized;
4896 }
4897 }
4898}
4899
4901 Align MinAlign) const {
4902 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4903 // datalayout for the preferred alignment. Also there should be a target hook
4904 // for this to allow targets to reduce the alignment and ignore the
4905 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4906 // the type.
4907 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4908}
4909
4912 MachinePointerInfo &PtrInfo) {
4913 MachineFunction &MF = MIRBuilder.getMF();
4914 const DataLayout &DL = MIRBuilder.getDataLayout();
4915 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4916
4917 unsigned AddrSpace = DL.getAllocaAddrSpace();
4918 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4919
4920 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4921 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4922}
4923
4925 const SrcOp &Val) {
4926 LLT SrcTy = Val.getLLTTy(MRI);
4927 Align StackTypeAlign =
4928 std::max(getStackTemporaryAlignment(SrcTy),
4930 MachinePointerInfo PtrInfo;
4931 auto StackTemp =
4932 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4933
4934 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4935 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4936}
4937
4939 LLT VecTy) {
4940 LLT IdxTy = B.getMRI()->getType(IdxReg);
4941 unsigned NElts = VecTy.getNumElements();
4942
4943 int64_t IdxVal;
4944 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4945 if (IdxVal < VecTy.getNumElements())
4946 return IdxReg;
4947 // If a constant index would be out of bounds, clamp it as well.
4948 }
4949
4950 if (isPowerOf2_32(NElts)) {
4951 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4952 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4953 }
4954
4955 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4956 .getReg(0);
4957}
4958
4960 Register Index) {
4961 LLT EltTy = VecTy.getElementType();
4962
4963 // Calculate the element offset and add it to the pointer.
4964 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4965 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4966 "Converting bits to bytes lost precision");
4967
4968 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4969
4970 // Convert index to the correct size for the address space.
4971 const DataLayout &DL = MIRBuilder.getDataLayout();
4972 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4973 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4974 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4975 if (IdxTy != MRI.getType(Index))
4976 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4977
4978 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4979 MIRBuilder.buildConstant(IdxTy, EltSize));
4980
4981 LLT PtrTy = MRI.getType(VecPtr);
4982 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4983}
4984
4985#ifndef NDEBUG
4986/// Check that all vector operands have same number of elements. Other operands
4987/// should be listed in NonVecOp.
4990 std::initializer_list<unsigned> NonVecOpIndices) {
4991 if (MI.getNumMemOperands() != 0)
4992 return false;
4993
4994 LLT VecTy = MRI.getType(MI.getReg(0));
4995 if (!VecTy.isVector())
4996 return false;
4997 unsigned NumElts = VecTy.getNumElements();
4998
4999 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5000 MachineOperand &Op = MI.getOperand(OpIdx);
5001 if (!Op.isReg()) {
5002 if (!is_contained(NonVecOpIndices, OpIdx))
5003 return false;
5004 continue;
5005 }
5006
5007 LLT Ty = MRI.getType(Op.getReg());
5008 if (!Ty.isVector()) {
5009 if (!is_contained(NonVecOpIndices, OpIdx))
5010 return false;
5011 continue;
5012 }
5013
5014 if (Ty.getNumElements() != NumElts)
5015 return false;
5016 }
5017
5018 return true;
5019}
5020#endif
5021
5022/// Fill \p DstOps with DstOps that have same number of elements combined as
5023/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5024/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5025/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5026static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5027 unsigned NumElts) {
5028 LLT LeftoverTy;
5029 assert(Ty.isVector() && "Expected vector type");
5030 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5031 int NumParts, NumLeftover;
5032 std::tie(NumParts, NumLeftover) =
5033 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5034
5035 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5036 for (int i = 0; i < NumParts; ++i) {
5037 DstOps.push_back(NarrowTy);
5038 }
5039
5040 if (LeftoverTy.isValid()) {
5041 assert(NumLeftover == 1 && "expected exactly one leftover");
5042 DstOps.push_back(LeftoverTy);
5043 }
5044}
5045
5046/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5047/// made from \p Op depending on operand type.
5049 MachineOperand &Op) {
5050 for (unsigned i = 0; i < N; ++i) {
5051 if (Op.isReg())
5052 Ops.push_back(Op.getReg());
5053 else if (Op.isImm())
5054 Ops.push_back(Op.getImm());
5055 else if (Op.isPredicate())
5056 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5057 else
5058 llvm_unreachable("Unsupported type");
5059 }
5060}
5061
5062// Handle splitting vector operations which need to have the same number of
5063// elements in each type index, but each type index may have a different element
5064// type.
5065//
5066// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5067// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5068// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5069//
5070// Also handles some irregular breakdown cases, e.g.
5071// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5072// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5073// s64 = G_SHL s64, s32
5076 GenericMachineInstr &MI, unsigned NumElts,
5077 std::initializer_list<unsigned> NonVecOpIndices) {
5078 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5079 "Non-compatible opcode or not specified non-vector operands");
5080 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5081
5082 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5083 unsigned NumDefs = MI.getNumDefs();
5084
5085 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5086 // Build instructions with DstOps to use instruction found by CSE directly.
5087 // CSE copies found instruction into given vreg when building with vreg dest.
5088 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5089 // Output registers will be taken from created instructions.
5090 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5091 for (unsigned i = 0; i < NumDefs; ++i) {
5092 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5093 }
5094
5095 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5096 // Operands listed in NonVecOpIndices will be used as is without splitting;
5097 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5098 // scalar condition (op 1), immediate in sext_inreg (op 2).
5099 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5100 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5101 ++UseIdx, ++UseNo) {
5102 if (is_contained(NonVecOpIndices, UseIdx)) {
5103 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5104 MI.getOperand(UseIdx));
5105 } else {
5106 SmallVector<Register, 8> SplitPieces;
5107 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5108 MRI);
5109 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5110 }
5111 }
5112
5113 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5114
5115 // Take i-th piece of each input operand split and build sub-vector/scalar
5116 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5117 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5119 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5120 Defs.push_back(OutputOpsPieces[DstNo][i]);
5121
5123 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5124 Uses.push_back(InputOpsPieces[InputNo][i]);
5125
5126 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5127 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5128 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5129 }
5130
5131 // Merge small outputs into MI's output for each def operand.
5132 if (NumLeftovers) {
5133 for (unsigned i = 0; i < NumDefs; ++i)
5134 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5135 } else {
5136 for (unsigned i = 0; i < NumDefs; ++i)
5137 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5138 }
5139
5140 MI.eraseFromParent();
5141 return Legalized;
5142}
5143
5146 unsigned NumElts) {
5147 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5148
5149 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5150 unsigned NumDefs = MI.getNumDefs();
5151
5152 SmallVector<DstOp, 8> OutputOpsPieces;
5153 SmallVector<Register, 8> OutputRegs;
5154 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5155
5156 // Instructions that perform register split will be inserted in basic block
5157 // where register is defined (basic block is in the next operand).
5158 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5159 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5160 UseIdx += 2, ++UseNo) {
5161 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5162 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5163 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5164 MIRBuilder, MRI);
5165 }
5166
5167 // Build PHIs with fewer elements.
5168 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5169 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5170 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5171 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5172 Phi.addDef(
5173 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5174 OutputRegs.push_back(Phi.getReg(0));
5175
5176 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5177 Phi.addUse(InputOpsPieces[j][i]);
5178 Phi.add(MI.getOperand(1 + j * 2 + 1));
5179 }
5180 }
5181
5182 // Set the insert point after the existing PHIs
5183 MachineBasicBlock &MBB = *MI.getParent();
5184 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5185
5186 // Merge small outputs into MI's def.
5187 if (NumLeftovers) {
5188 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5189 } else {
5190 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5191 }
5192
5193 MI.eraseFromParent();
5194 return Legalized;
5195}
5196
5199 unsigned TypeIdx,
5200 LLT NarrowTy) {
5201 const int NumDst = MI.getNumOperands() - 1;
5202 const Register SrcReg = MI.getOperand(NumDst).getReg();
5203 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5204 LLT SrcTy = MRI.getType(SrcReg);
5205
5206 if (TypeIdx != 1 || NarrowTy == DstTy)
5207 return UnableToLegalize;
5208
5209 // Requires compatible types. Otherwise SrcReg should have been defined by
5210 // merge-like instruction that would get artifact combined. Most likely
5211 // instruction that defines SrcReg has to perform more/fewer elements
5212 // legalization compatible with NarrowTy.
5213 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5214 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5215
5216 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5217 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5218 return UnableToLegalize;
5219
5220 // This is most likely DstTy (smaller then register size) packed in SrcTy
5221 // (larger then register size) and since unmerge was not combined it will be
5222 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5223 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5224
5225 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5226 //
5227 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5228 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5229 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5230 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5231 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5232 const int PartsPerUnmerge = NumDst / NumUnmerge;
5233
5234 for (int I = 0; I != NumUnmerge; ++I) {
5235 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5236
5237 for (int J = 0; J != PartsPerUnmerge; ++J)
5238 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5239 MIB.addUse(Unmerge.getReg(I));
5240 }
5241
5242 MI.eraseFromParent();
5243 return Legalized;
5244}
5245
5248 LLT NarrowTy) {
5249 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5250 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5251 // that should have been artifact combined. Most likely instruction that uses
5252 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5253 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5254 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5255 if (NarrowTy == SrcTy)
5256 return UnableToLegalize;
5257
5258 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5259 // is for old mir tests. Since the changes to more/fewer elements it should no
5260 // longer be possible to generate MIR like this when starting from llvm-ir
5261 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5262 if (TypeIdx == 1) {
5263 assert(SrcTy.isVector() && "Expected vector types");
5264 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5265 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5266 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5267 return UnableToLegalize;
5268 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5269 //
5270 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5271 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5272 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5273 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5274 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5275 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5276
5278 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5279 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5280 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5281 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5282 Elts.push_back(Unmerge.getReg(j));
5283 }
5284
5285 SmallVector<Register, 8> NarrowTyElts;
5286 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5287 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5288 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5289 ++i, Offset += NumNarrowTyElts) {
5290 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5291 NarrowTyElts.push_back(
5292 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5293 }
5294
5295 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5296 MI.eraseFromParent();
5297 return Legalized;
5298 }
5299
5300 assert(TypeIdx == 0 && "Bad type index");
5301 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5302 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5303 return UnableToLegalize;
5304
5305 // This is most likely SrcTy (smaller then register size) packed in DstTy
5306 // (larger then register size) and since merge was not combined it will be
5307 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5308 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5309
5310 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5311 //
5312 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5313 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5314 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5315 SmallVector<Register, 8> NarrowTyElts;
5316 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5317 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5318 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5319 for (unsigned i = 0; i < NumParts; ++i) {
5321 for (unsigned j = 0; j < NumElts; ++j)
5322 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5323 NarrowTyElts.push_back(
5324 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5325 }
5326
5327 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5328 MI.eraseFromParent();
5329 return Legalized;
5330}
5331
5334 unsigned TypeIdx,
5335 LLT NarrowVecTy) {
5336 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5337 Register InsertVal;
5338 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5339
5340 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5341 if (IsInsert)
5342 InsertVal = MI.getOperand(2).getReg();
5343
5344 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5345 LLT VecTy = MRI.getType(SrcVec);
5346
5347 // If the index is a constant, we can really break this down as you would
5348 // expect, and index into the target size pieces.
5349 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5350 if (MaybeCst) {
5351 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5352 // Avoid out of bounds indexing the pieces.
5353 if (IdxVal >= VecTy.getNumElements()) {
5354 MIRBuilder.buildUndef(DstReg);
5355 MI.eraseFromParent();
5356 return Legalized;
5357 }
5358
5359 if (!NarrowVecTy.isVector()) {
5360 SmallVector<Register, 8> SplitPieces;
5361 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5362 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5363 if (IsInsert) {
5364 SplitPieces[IdxVal] = InsertVal;
5365 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5366 } else {
5367 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5368 }
5369 } else {
5370 SmallVector<Register, 8> VecParts;
5371 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5372
5373 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5374 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5375 TargetOpcode::G_ANYEXT);
5376
5377 unsigned NewNumElts = NarrowVecTy.getNumElements();
5378
5379 LLT IdxTy = MRI.getType(Idx);
5380 int64_t PartIdx = IdxVal / NewNumElts;
5381 auto NewIdx =
5382 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5383
5384 if (IsInsert) {
5385 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5386
5387 // Use the adjusted index to insert into one of the subvectors.
5388 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5389 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5390 VecParts[PartIdx] = InsertPart.getReg(0);
5391
5392 // Recombine the inserted subvector with the others to reform the result
5393 // vector.
5394 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5395 } else {
5396 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5397 }
5398 }
5399
5400 MI.eraseFromParent();
5401 return Legalized;
5402 }
5403
5404 // With a variable index, we can't perform the operation in a smaller type, so
5405 // we're forced to expand this.
5406 //
5407 // TODO: We could emit a chain of compare/select to figure out which piece to
5408 // index.
5410}
5411
5414 LLT NarrowTy) {
5415 // FIXME: Don't know how to handle secondary types yet.
5416 if (TypeIdx != 0)
5417 return UnableToLegalize;
5418
5419 if (!NarrowTy.isByteSized()) {
5420 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5421 return UnableToLegalize;
5422 }
5423
5424 // This implementation doesn't work for atomics. Give up instead of doing
5425 // something invalid.
5426 if (LdStMI.isAtomic())
5427 return UnableToLegalize;
5428
5429 bool IsLoad = isa<GLoad>(LdStMI);
5430 Register ValReg = LdStMI.getReg(0);
5431 Register AddrReg = LdStMI.getPointerReg();
5432 LLT ValTy = MRI.getType(ValReg);
5433
5434 // FIXME: Do we need a distinct NarrowMemory legalize action?
5435 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5436 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5437 return UnableToLegalize;
5438 }
5439
5440 int NumParts = -1;
5441 int NumLeftover = -1;
5442 LLT LeftoverTy;
5443 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5444 if (IsLoad) {
5445 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5446 } else {
5447 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5448 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5449 NumParts = NarrowRegs.size();
5450 NumLeftover = NarrowLeftoverRegs.size();
5451 }
5452 }
5453
5454 if (NumParts == -1)
5455 return UnableToLegalize;
5456
5457 LLT PtrTy = MRI.getType(AddrReg);
5458 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5459
5460 unsigned TotalSize = ValTy.getSizeInBits();
5461
5462 // Split the load/store into PartTy sized pieces starting at Offset. If this
5463 // is a load, return the new registers in ValRegs. For a store, each elements
5464 // of ValRegs should be PartTy. Returns the next offset that needs to be
5465 // handled.
5466 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5467 auto MMO = LdStMI.getMMO();
5468 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5469 unsigned NumParts, unsigned Offset) -> unsigned {
5470 MachineFunction &MF = MIRBuilder.getMF();
5471 unsigned PartSize = PartTy.getSizeInBits();
5472 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5473 ++Idx) {
5474 unsigned ByteOffset = Offset / 8;
5475 Register NewAddrReg;
5476
5477 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5478 ByteOffset);
5479
5480 MachineMemOperand *NewMMO =
5481 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5482
5483 if (IsLoad) {
5484 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5485 ValRegs.push_back(Dst);
5486 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5487 } else {
5488 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5489 }
5490 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5491 }
5492
5493 return Offset;
5494 };
5495
5496 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5497 unsigned HandledOffset =
5498 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5499
5500 // Handle the rest of the register if this isn't an even type breakdown.
5501 if (LeftoverTy.isValid())
5502 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5503
5504 if (IsLoad) {
5505 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5506 LeftoverTy, NarrowLeftoverRegs);
5507 }
5508
5509 LdStMI.eraseFromParent();
5510 return Legalized;
5511}
5512
5515 LLT NarrowTy) {
5516 using namespace TargetOpcode;
5518 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5519
5520 switch (MI.getOpcode()) {
5521 case G_IMPLICIT_DEF:
5522 case G_TRUNC:
5523 case G_AND:
5524 case G_OR:
5525 case G_XOR:
5526 case G_ADD:
5527 case G_SUB:
5528 case G_MUL:
5529 case G_PTR_ADD:
5530 case G_SMULH:
5531 case G_UMULH:
5532 case G_FADD:
5533 case G_FMUL:
5534 case G_FSUB:
5535 case G_FNEG:
5536 case G_FABS:
5537 case G_FCANONICALIZE:
5538 case G_FDIV:
5539 case G_FREM:
5540 case G_FMA:
5541 case G_FMAD:
5542 case G_FPOW:
5543 case G_FEXP:
5544 case G_FEXP2:
5545 case G_FEXP10:
5546 case G_FLOG:
5547 case G_FLOG2:
5548 case G_FLOG10:
5549 case G_FLDEXP:
5550 case G_FNEARBYINT:
5551 case G_FCEIL:
5552 case G_FFLOOR:
5553 case G_FRINT:
5554 case G_INTRINSIC_LRINT:
5555 case G_INTRINSIC_LLRINT:
5556 case G_INTRINSIC_ROUND:
5557 case G_INTRINSIC_ROUNDEVEN:
5558 case G_LROUND:
5559 case G_LLROUND:
5560 case G_INTRINSIC_TRUNC:
5561 case G_FMODF:
5562 case G_FCOS:
5563 case G_FSIN:
5564 case G_FTAN:
5565 case G_FACOS:
5566 case G_FASIN:
5567 case G_FATAN:
5568 case G_FATAN2:
5569 case G_FCOSH:
5570 case G_FSINH:
5571 case G_FTANH:
5572 case G_FSQRT:
5573 case G_BSWAP:
5574 case G_BITREVERSE:
5575 case G_SDIV:
5576 case G_UDIV:
5577 case G_SREM:
5578 case G_UREM:
5579 case G_SDIVREM:
5580 case G_UDIVREM:
5581 case G_SMIN:
5582 case G_SMAX:
5583 case G_UMIN:
5584 case G_UMAX:
5585 case G_ABS:
5586 case G_FMINNUM:
5587 case G_FMAXNUM:
5588 case G_FMINNUM_IEEE:
5589 case G_FMAXNUM_IEEE:
5590 case G_FMINIMUM:
5591 case G_FMAXIMUM:
5592 case G_FMINIMUMNUM:
5593 case G_FMAXIMUMNUM:
5594 case G_FSHL:
5595 case G_FSHR:
5596 case G_ROTL:
5597 case G_ROTR:
5598 case G_FREEZE:
5599 case G_SADDSAT:
5600 case G_SSUBSAT:
5601 case G_UADDSAT:
5602 case G_USUBSAT:
5603 case G_UMULO:
5604 case G_SMULO:
5605 case G_SHL:
5606 case G_LSHR:
5607 case G_ASHR:
5608 case G_SSHLSAT:
5609 case G_USHLSAT:
5610 case G_CTLZ:
5611 case G_CTLZ_ZERO_UNDEF:
5612 case G_CTTZ:
5613 case G_CTTZ_ZERO_UNDEF:
5614 case G_CTPOP:
5615 case G_FCOPYSIGN:
5616 case G_ZEXT:
5617 case G_SEXT:
5618 case G_ANYEXT:
5619 case G_FPEXT:
5620 case G_FPTRUNC:
5621 case G_SITOFP:
5622 case G_UITOFP:
5623 case G_FPTOSI:
5624 case G_FPTOUI:
5625 case G_FPTOSI_SAT:
5626 case G_FPTOUI_SAT:
5627 case G_INTTOPTR:
5628 case G_PTRTOINT:
5629 case G_ADDRSPACE_CAST:
5630 case G_UADDO:
5631 case G_USUBO:
5632 case G_UADDE:
5633 case G_USUBE:
5634 case G_SADDO:
5635 case G_SSUBO:
5636 case G_SADDE:
5637 case G_SSUBE:
5638 case G_STRICT_FADD:
5639 case G_STRICT_FSUB:
5640 case G_STRICT_FMUL:
5641 case G_STRICT_FMA:
5642 case G_STRICT_FLDEXP:
5643 case G_FFREXP:
5644 return fewerElementsVectorMultiEltType(GMI, NumElts);
5645 case G_ICMP:
5646 case G_FCMP:
5647 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5648 case G_IS_FPCLASS:
5649 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5650 case G_SELECT:
5651 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5652 return fewerElementsVectorMultiEltType(GMI, NumElts);
5653 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5654 case G_PHI:
5655 return fewerElementsVectorPhi(GMI, NumElts);
5656 case G_UNMERGE_VALUES:
5657 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5658 case G_BUILD_VECTOR:
5659 assert(TypeIdx == 0 && "not a vector type index");
5660 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5661 case G_CONCAT_VECTORS:
5662 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5663 return UnableToLegalize;
5664 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5665 case G_EXTRACT_VECTOR_ELT:
5666 case G_INSERT_VECTOR_ELT:
5667 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5668 case G_LOAD:
5669 case G_STORE:
5670 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5671 case G_SEXT_INREG:
5672 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5674 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5675 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5676 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5677 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5678 case G_SHUFFLE_VECTOR:
5679 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5680 case G_FPOWI:
5681 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5682 case G_BITCAST:
5683 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5684 case G_INTRINSIC_FPTRUNC_ROUND:
5685 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5686 default:
5687 return UnableToLegalize;
5688 }
5689}
5690
5693 LLT NarrowTy) {
5694 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5695 "Not a bitcast operation");
5696
5697 if (TypeIdx != 0)
5698 return UnableToLegalize;
5699
5700 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5701
5702 unsigned NewElemCount =
5703 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5704 SmallVector<Register> SrcVRegs, BitcastVRegs;
5705 if (NewElemCount == 1) {
5706 LLT SrcNarrowTy = SrcTy.getElementType();
5707
5708 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5709 getUnmergeResults(SrcVRegs, *Unmerge);
5710 } else {
5711 LLT SrcNarrowTy =
5713
5714 // Split the Src and Dst Reg into smaller registers
5715 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5716 return UnableToLegalize;
5717 }
5718
5719 // Build new smaller bitcast instructions
5720 // Not supporting Leftover types for now but will have to
5721 for (Register Reg : SrcVRegs)
5722 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5723
5724 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5725 MI.eraseFromParent();
5726 return Legalized;
5727}
5728
5730 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5731 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5732 if (TypeIdx != 0)
5733 return UnableToLegalize;
5734
5735 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5736 MI.getFirst3RegLLTs();
5737 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5738 // The shuffle should be canonicalized by now.
5739 if (DstTy != Src1Ty)
5740 return UnableToLegalize;
5741 if (DstTy != Src2Ty)
5742 return UnableToLegalize;
5743
5744 if (!isPowerOf2_32(DstTy.getNumElements()))
5745 return UnableToLegalize;
5746
5747 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5748 // Further legalization attempts will be needed to do split further.
5749 NarrowTy =
5750 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5751 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5752
5753 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5754 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5755 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5756 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5757 SplitSrc2Regs[1]};
5758
5759 Register Hi, Lo;
5760
5761 // If Lo or Hi uses elements from at most two of the four input vectors, then
5762 // express it as a vector shuffle of those two inputs. Otherwise extract the
5763 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5765 for (unsigned High = 0; High < 2; ++High) {
5766 Register &Output = High ? Hi : Lo;
5767
5768 // Build a shuffle mask for the output, discovering on the fly which
5769 // input vectors to use as shuffle operands (recorded in InputUsed).
5770 // If building a suitable shuffle vector proves too hard, then bail
5771 // out with useBuildVector set.
5772 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5773 unsigned FirstMaskIdx = High * NewElts;
5774 bool UseBuildVector = false;
5775 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5776 // The mask element. This indexes into the input.
5777 int Idx = Mask[FirstMaskIdx + MaskOffset];
5778
5779 // The input vector this mask element indexes into.
5780 unsigned Input = (unsigned)Idx / NewElts;
5781
5782 if (Input >= std::size(Inputs)) {
5783 // The mask element does not index into any input vector.
5784 Ops.push_back(-1);
5785 continue;
5786 }
5787
5788 // Turn the index into an offset from the start of the input vector.
5789 Idx -= Input * NewElts;
5790
5791 // Find or create a shuffle vector operand to hold this input.
5792 unsigned OpNo;
5793 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5794 if (InputUsed[OpNo] == Input) {
5795 // This input vector is already an operand.
5796 break;
5797 } else if (InputUsed[OpNo] == -1U) {
5798 // Create a new operand for this input vector.
5799 InputUsed[OpNo] = Input;
5800 break;
5801 }
5802 }
5803
5804 if (OpNo >= std::size(InputUsed)) {
5805 // More than two input vectors used! Give up on trying to create a
5806 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5807 UseBuildVector = true;
5808 break;
5809 }
5810
5811 // Add the mask index for the new shuffle vector.
5812 Ops.push_back(Idx + OpNo * NewElts);
5813 }
5814
5815 if (UseBuildVector) {
5816 LLT EltTy = NarrowTy.getElementType();
5818
5819 // Extract the input elements by hand.
5820 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5821 // The mask element. This indexes into the input.
5822 int Idx = Mask[FirstMaskIdx + MaskOffset];
5823
5824 // The input vector this mask element indexes into.
5825 unsigned Input = (unsigned)Idx / NewElts;
5826
5827 if (Input >= std::size(Inputs)) {
5828 // The mask element is "undef" or indexes off the end of the input.
5829 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5830 continue;
5831 }
5832
5833 // Turn the index into an offset from the start of the input vector.
5834 Idx -= Input * NewElts;
5835
5836 // Extract the vector element by hand.
5837 SVOps.push_back(MIRBuilder
5838 .buildExtractVectorElement(
5839 EltTy, Inputs[Input],
5840 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5841 .getReg(0));
5842 }
5843
5844 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5845 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5846 } else if (InputUsed[0] == -1U) {
5847 // No input vectors were used! The result is undefined.
5848 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5849 } else if (NewElts == 1) {
5850 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5851 } else {
5852 Register Op0 = Inputs[InputUsed[0]];
5853 // If only one input was used, use an undefined vector for the other.
5854 Register Op1 = InputUsed[1] == -1U
5855 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5856 : Inputs[InputUsed[1]];
5857 // At least one input vector was used. Create a new shuffle vector.
5858 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5859 }
5860
5861 Ops.clear();
5862 }
5863
5864 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5865 MI.eraseFromParent();
5866 return Legalized;
5867}
5868
5870 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5871 auto &RdxMI = cast<GVecReduce>(MI);
5872
5873 if (TypeIdx != 1)
5874 return UnableToLegalize;
5875
5876 // The semantics of the normal non-sequential reductions allow us to freely
5877 // re-associate the operation.
5878 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5879
5880 if (NarrowTy.isVector() &&
5881 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5882 return UnableToLegalize;
5883
5884 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5885 SmallVector<Register> SplitSrcs;
5886 // If NarrowTy is a scalar then we're being asked to scalarize.
5887 const unsigned NumParts =
5888 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5889 : SrcTy.getNumElements();
5890
5891 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5892 if (NarrowTy.isScalar()) {
5893 if (DstTy != NarrowTy)
5894 return UnableToLegalize; // FIXME: handle implicit extensions.
5895
5896 if (isPowerOf2_32(NumParts)) {
5897 // Generate a tree of scalar operations to reduce the critical path.
5898 SmallVector<Register> PartialResults;
5899 unsigned NumPartsLeft = NumParts;
5900 while (NumPartsLeft > 1) {
5901 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5902 PartialResults.emplace_back(
5904 .buildInstr(ScalarOpc, {NarrowTy},
5905 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5906 .getReg(0));
5907 }
5908 SplitSrcs = PartialResults;
5909 PartialResults.clear();
5910 NumPartsLeft = SplitSrcs.size();
5911 }
5912 assert(SplitSrcs.size() == 1);
5913 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5914 MI.eraseFromParent();
5915 return Legalized;
5916 }
5917 // If we can't generate a tree, then just do sequential operations.
5918 Register Acc = SplitSrcs[0];
5919 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5920 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5921 .getReg(0);
5922 MIRBuilder.buildCopy(DstReg, Acc);
5923 MI.eraseFromParent();
5924 return Legalized;
5925 }
5926 SmallVector<Register> PartialReductions;
5927 for (unsigned Part = 0; Part < NumParts; ++Part) {
5928 PartialReductions.push_back(
5929 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5930 .getReg(0));
5931 }
5932
5933 // If the types involved are powers of 2, we can generate intermediate vector
5934 // ops, before generating a final reduction operation.
5935 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5936 isPowerOf2_32(NarrowTy.getNumElements())) {
5937 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5938 }
5939
5940 Register Acc = PartialReductions[0];
5941 for (unsigned Part = 1; Part < NumParts; ++Part) {
5942 if (Part == NumParts - 1) {
5943 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5944 {Acc, PartialReductions[Part]});
5945 } else {
5946 Acc = MIRBuilder
5947 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5948 .getReg(0);
5949 }
5950 }
5951 MI.eraseFromParent();
5952 return Legalized;
5953}
5954
5957 unsigned int TypeIdx,
5958 LLT NarrowTy) {
5959 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5960 MI.getFirst3RegLLTs();
5961 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5962 DstTy != NarrowTy)
5963 return UnableToLegalize;
5964
5965 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5966 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5967 "Unexpected vecreduce opcode");
5968 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5969 ? TargetOpcode::G_FADD
5970 : TargetOpcode::G_FMUL;
5971
5972 SmallVector<Register> SplitSrcs;
5973 unsigned NumParts = SrcTy.getNumElements();
5974 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5975 Register Acc = ScalarReg;
5976 for (unsigned i = 0; i < NumParts; i++)
5977 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5978 .getReg(0);
5979
5980 MIRBuilder.buildCopy(DstReg, Acc);
5981 MI.eraseFromParent();
5982 return Legalized;
5983}
5984
5986LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5987 LLT SrcTy, LLT NarrowTy,
5988 unsigned ScalarOpc) {
5989 SmallVector<Register> SplitSrcs;
5990 // Split the sources into NarrowTy size pieces.
5991 extractParts(SrcReg, NarrowTy,
5992 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5993 MIRBuilder, MRI);
5994 // We're going to do a tree reduction using vector operations until we have
5995 // one NarrowTy size value left.
5996 while (SplitSrcs.size() > 1) {
5997 SmallVector<Register> PartialRdxs;
5998 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5999 Register LHS = SplitSrcs[Idx];
6000 Register RHS = SplitSrcs[Idx + 1];
6001 // Create the intermediate vector op.
6002 Register Res =
6003 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6004 PartialRdxs.push_back(Res);
6005 }
6006 SplitSrcs = std::move(PartialRdxs);
6007 }
6008 // Finally generate the requested NarrowTy based reduction.
6009 Observer.changingInstr(MI);
6010 MI.getOperand(1).setReg(SplitSrcs[0]);
6011 Observer.changedInstr(MI);
6012 return Legalized;
6013}
6014
6017 const LLT HalfTy, const LLT AmtTy) {
6018
6019 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6020 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6021 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6022
6023 if (Amt.isZero()) {
6024 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6025 MI.eraseFromParent();
6026 return Legalized;
6027 }
6028
6029 LLT NVT = HalfTy;
6030 unsigned NVTBits = HalfTy.getSizeInBits();
6031 unsigned VTBits = 2 * NVTBits;
6032
6033 SrcOp Lo(Register(0)), Hi(Register(0));
6034 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6035 if (Amt.ugt(VTBits)) {
6036 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6037 } else if (Amt.ugt(NVTBits)) {
6038 Lo = MIRBuilder.buildConstant(NVT, 0);
6039 Hi = MIRBuilder.buildShl(NVT, InL,
6040 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6041 } else if (Amt == NVTBits) {
6042 Lo = MIRBuilder.buildConstant(NVT, 0);
6043 Hi = InL;
6044 } else {
6045 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6046 auto OrLHS =
6047 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6048 auto OrRHS = MIRBuilder.buildLShr(
6049 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6050 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6051 }
6052 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6053 if (Amt.ugt(VTBits)) {
6054 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6055 } else if (Amt.ugt(NVTBits)) {
6056 Lo = MIRBuilder.buildLShr(NVT, InH,
6057 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6058 Hi = MIRBuilder.buildConstant(NVT, 0);
6059 } else if (Amt == NVTBits) {
6060 Lo = InH;
6061 Hi = MIRBuilder.buildConstant(NVT, 0);
6062 } else {
6063 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6064
6065 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6066 auto OrRHS = MIRBuilder.buildShl(
6067 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6068
6069 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6070 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6071 }
6072 } else {
6073 if (Amt.ugt(VTBits)) {
6074 Hi = Lo = MIRBuilder.buildAShr(
6075 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6076 } else if (Amt.ugt(NVTBits)) {
6077 Lo = MIRBuilder.buildAShr(NVT, InH,
6078 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6079 Hi = MIRBuilder.buildAShr(NVT, InH,
6080 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6081 } else if (Amt == NVTBits) {
6082 Lo = InH;
6083 Hi = MIRBuilder.buildAShr(NVT, InH,
6084 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6085 } else {
6086 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6087
6088 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6089 auto OrRHS = MIRBuilder.buildShl(
6090 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6091
6092 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6093 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6094 }
6095 }
6096
6097 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6098 MI.eraseFromParent();
6099
6100 return Legalized;
6101}
6102
6105 LLT RequestedTy) {
6106 if (TypeIdx == 1) {
6107 Observer.changingInstr(MI);
6108 narrowScalarSrc(MI, RequestedTy, 2);
6109 Observer.changedInstr(MI);
6110 return Legalized;
6111 }
6112
6113 Register DstReg = MI.getOperand(0).getReg();
6114 LLT DstTy = MRI.getType(DstReg);
6115 if (DstTy.isVector())
6116 return UnableToLegalize;
6117
6118 Register Amt = MI.getOperand(2).getReg();
6119 LLT ShiftAmtTy = MRI.getType(Amt);
6120 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6121 if (DstEltSize % 2 != 0)
6122 return UnableToLegalize;
6123
6124 // Check if we should use multi-way splitting instead of recursive binary
6125 // splitting.
6126 //
6127 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6128 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6129 // and dependency chains created by usual binary splitting approach
6130 // (128->64->32).
6131 //
6132 // The >= 8 parts threshold ensures we only use this optimization when binary
6133 // splitting would require multiple recursive passes, avoiding overhead for
6134 // simple 2-way splits where binary approach is sufficient.
6135 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6136 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6137 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6138 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6139 // steps).
6140 if (NumParts >= 8)
6141 return narrowScalarShiftMultiway(MI, RequestedTy);
6142 }
6143
6144 // Fall back to binary splitting:
6145 // Ignore the input type. We can only go to exactly half the size of the
6146 // input. If that isn't small enough, the resulting pieces will be further
6147 // legalized.
6148 const unsigned NewBitSize = DstEltSize / 2;
6149 const LLT HalfTy = LLT::scalar(NewBitSize);
6150 const LLT CondTy = LLT::scalar(1);
6151
6152 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6153 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6154 ShiftAmtTy);
6155 }
6156
6157 // TODO: Expand with known bits.
6158
6159 // Handle the fully general expansion by an unknown amount.
6160 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6161
6162 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6163 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6164 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6165
6166 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6167 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6168
6169 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6170 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6171 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6172
6173 Register ResultRegs[2];
6174 switch (MI.getOpcode()) {
6175 case TargetOpcode::G_SHL: {
6176 // Short: ShAmt < NewBitSize
6177 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6178
6179 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6180 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6181 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6182
6183 // Long: ShAmt >= NewBitSize
6184 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6185 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6186
6187 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6188 auto Hi = MIRBuilder.buildSelect(
6189 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6190
6191 ResultRegs[0] = Lo.getReg(0);
6192 ResultRegs[1] = Hi.getReg(0);
6193 break;
6194 }
6195 case TargetOpcode::G_LSHR:
6196 case TargetOpcode::G_ASHR: {
6197 // Short: ShAmt < NewBitSize
6198 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6199
6200 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6201 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6202 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6203
6204 // Long: ShAmt >= NewBitSize
6206 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6207 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6208 } else {
6209 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6210 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6211 }
6212 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6213 {InH, AmtExcess}); // Lo from Hi part.
6214
6215 auto Lo = MIRBuilder.buildSelect(
6216 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6217
6218 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6219
6220 ResultRegs[0] = Lo.getReg(0);
6221 ResultRegs[1] = Hi.getReg(0);
6222 break;
6223 }
6224 default:
6225 llvm_unreachable("not a shift");
6226 }
6227
6228 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6229 MI.eraseFromParent();
6230 return Legalized;
6231}
6232
6234 unsigned PartIdx,
6235 unsigned NumParts,
6236 ArrayRef<Register> SrcParts,
6237 const ShiftParams &Params,
6238 LLT TargetTy, LLT ShiftAmtTy) {
6239 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6240 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6241 assert(WordShiftConst && BitShiftConst && "Expected constants");
6242
6243 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6244 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6245 const bool NeedsInterWordShift = ShiftBits != 0;
6246
6247 switch (Opcode) {
6248 case TargetOpcode::G_SHL: {
6249 // Data moves from lower indices to higher indices
6250 // If this part would come from a source beyond our range, it's zero
6251 if (PartIdx < ShiftWords)
6252 return Params.Zero;
6253
6254 unsigned SrcIdx = PartIdx - ShiftWords;
6255 if (!NeedsInterWordShift)
6256 return SrcParts[SrcIdx];
6257
6258 // Combine shifted main part with carry from previous part
6259 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6260 if (SrcIdx > 0) {
6261 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6262 Params.InvBitShift);
6263 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6264 }
6265 return Hi.getReg(0);
6266 }
6267
6268 case TargetOpcode::G_LSHR: {
6269 unsigned SrcIdx = PartIdx + ShiftWords;
6270 if (SrcIdx >= NumParts)
6271 return Params.Zero;
6272 if (!NeedsInterWordShift)
6273 return SrcParts[SrcIdx];
6274
6275 // Combine shifted main part with carry from next part
6276 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6277 if (SrcIdx + 1 < NumParts) {
6278 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6279 Params.InvBitShift);
6280 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6281 }
6282 return Lo.getReg(0);
6283 }
6284
6285 case TargetOpcode::G_ASHR: {
6286 // Like LSHR but preserves sign bit
6287 unsigned SrcIdx = PartIdx + ShiftWords;
6288 if (SrcIdx >= NumParts)
6289 return Params.SignBit;
6290 if (!NeedsInterWordShift)
6291 return SrcParts[SrcIdx];
6292
6293 // Only the original MSB part uses arithmetic shift to preserve sign. All
6294 // other parts use logical shift since they're just moving data bits.
6295 auto Lo =
6296 (SrcIdx == NumParts - 1)
6297 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6298 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6299 Register HiSrc =
6300 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6301 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6302 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6303 }
6304
6305 default:
6306 llvm_unreachable("not a shift");
6307 }
6308}
6309
6311 Register MainOperand,
6312 Register ShiftAmt,
6313 LLT TargetTy,
6314 Register CarryOperand) {
6315 // This helper generates a single output part for variable shifts by combining
6316 // the main operand (shifted by BitShift) with carry bits from an adjacent
6317 // part.
6318
6319 // For G_ASHR, individual parts don't have their own sign bit, only the
6320 // complete value does. So we use LSHR for the main operand shift in ASHR
6321 // context.
6322 unsigned MainOpcode =
6323 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6324
6325 // Perform the primary shift on the main operand
6326 Register MainShifted =
6327 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6328 .getReg(0);
6329
6330 // No carry operand available
6331 if (!CarryOperand.isValid())
6332 return MainShifted;
6333
6334 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6335 // so carry bits aren't needed.
6336 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6337 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6338 LLT BoolTy = LLT::scalar(1);
6339 auto IsZeroBitShift =
6340 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6341
6342 // Extract bits from the adjacent part that will "carry over" into this part.
6343 // The carry direction is opposite to the main shift direction, so we can
6344 // align the two shifted values before combining them with OR.
6345
6346 // Determine the carry shift opcode (opposite direction)
6347 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6348 : TargetOpcode::G_SHL;
6349
6350 // Calculate inverse shift amount: BitWidth - ShiftAmt
6351 auto TargetBitsConst =
6352 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6353 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6354
6355 // Shift the carry operand
6356 Register CarryBits =
6358 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6359 .getReg(0);
6360
6361 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6362 // TargetBits which would be poison for the individual carry shift operation).
6363 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6364 Register SafeCarryBits =
6365 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6366 .getReg(0);
6367
6368 // Combine the main shifted part with the carry bits
6369 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6370}
6371
6374 const APInt &Amt,
6375 LLT TargetTy,
6376 LLT ShiftAmtTy) {
6377 // Any wide shift can be decomposed into WordShift + BitShift components.
6378 // When shift amount is known constant, directly compute the decomposition
6379 // values and generate constant registers.
6380 Register DstReg = MI.getOperand(0).getReg();
6381 Register SrcReg = MI.getOperand(1).getReg();
6382 LLT DstTy = MRI.getType(DstReg);
6383
6384 const unsigned DstBits = DstTy.getScalarSizeInBits();
6385 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6386 const unsigned NumParts = DstBits / TargetBits;
6387
6388 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6389
6390 // When the shift amount is known at compile time, we just calculate which
6391 // source parts contribute to each output part.
6392
6393 SmallVector<Register, 8> SrcParts;
6394 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6395
6396 if (Amt.isZero()) {
6397 // No shift needed, just copy
6398 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6399 MI.eraseFromParent();
6400 return Legalized;
6401 }
6402
6403 ShiftParams Params;
6404 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6405 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6406
6407 // Generate constants and values needed by all shift types
6408 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6409 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6410 Params.InvBitShift =
6411 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6412 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6413
6414 // For ASHR, we need the sign-extended value to fill shifted-out positions
6415 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6416 Params.SignBit =
6418 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6419 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6420 .getReg(0);
6421
6422 SmallVector<Register, 8> DstParts(NumParts);
6423 for (unsigned I = 0; I < NumParts; ++I)
6424 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6425 Params, TargetTy, ShiftAmtTy);
6426
6427 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6428 MI.eraseFromParent();
6429 return Legalized;
6430}
6431
6434 Register DstReg = MI.getOperand(0).getReg();
6435 Register SrcReg = MI.getOperand(1).getReg();
6436 Register AmtReg = MI.getOperand(2).getReg();
6437 LLT DstTy = MRI.getType(DstReg);
6438 LLT ShiftAmtTy = MRI.getType(AmtReg);
6439
6440 const unsigned DstBits = DstTy.getScalarSizeInBits();
6441 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6442 const unsigned NumParts = DstBits / TargetBits;
6443
6444 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6445 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6446
6447 // If the shift amount is known at compile time, we can use direct indexing
6448 // instead of generating select chains in the general case.
6449 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6450 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6451 ShiftAmtTy);
6452
6453 // For runtime-variable shift amounts, we must generate a more complex
6454 // sequence that handles all possible shift values using select chains.
6455
6456 // Split the input into target-sized pieces
6457 SmallVector<Register, 8> SrcParts;
6458 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6459
6460 // Shifting by zero should be a no-op.
6461 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6462 LLT BoolTy = LLT::scalar(1);
6463 auto IsZeroShift =
6464 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6465
6466 // Any wide shift can be decomposed into two components:
6467 // 1. WordShift: number of complete target-sized words to shift
6468 // 2. BitShift: number of bits to shift within each word
6469 //
6470 // Example: 128-bit >> 50 with 32-bit target:
6471 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6472 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6473 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6474 auto TargetBitsLog2Const =
6475 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6476 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6477
6478 Register WordShift =
6479 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6480 Register BitShift =
6481 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6482
6483 // Fill values:
6484 // - SHL/LSHR: fill with zeros
6485 // - ASHR: fill with sign-extended MSB
6486 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6487
6488 Register FillValue;
6489 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6490 auto TargetBitsMinusOneConst =
6491 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6492 FillValue = MIRBuilder
6493 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6494 TargetBitsMinusOneConst)
6495 .getReg(0);
6496 } else {
6497 FillValue = ZeroReg;
6498 }
6499
6500 SmallVector<Register, 8> DstParts(NumParts);
6501
6502 // For each output part, generate a select chain that chooses the correct
6503 // result based on the runtime WordShift value. This handles all possible
6504 // word shift amounts by pre-calculating what each would produce.
6505 for (unsigned I = 0; I < NumParts; ++I) {
6506 // Initialize with appropriate default value for this shift type
6507 Register InBoundsResult = FillValue;
6508
6509 // clang-format off
6510 // Build a branchless select chain by pre-computing results for all possible
6511 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6512 //
6513 // K=0: select(WordShift==0, result0, FillValue)
6514 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6515 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6516 // clang-format on
6517 for (unsigned K = 0; K < NumParts; ++K) {
6518 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6519 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6520 WordShift, WordShiftKConst);
6521
6522 // Calculate source indices for this word shift
6523 //
6524 // For 4-part 128-bit value with K=1 word shift:
6525 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6526 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6527 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6528 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6529 int MainSrcIdx;
6530 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6531
6532 switch (MI.getOpcode()) {
6533 case TargetOpcode::G_SHL:
6534 MainSrcIdx = (int)I - (int)K;
6535 CarrySrcIdx = MainSrcIdx - 1;
6536 break;
6537 case TargetOpcode::G_LSHR:
6538 case TargetOpcode::G_ASHR:
6539 MainSrcIdx = (int)I + (int)K;
6540 CarrySrcIdx = MainSrcIdx + 1;
6541 break;
6542 default:
6543 llvm_unreachable("Not a shift");
6544 }
6545
6546 // Check bounds and build the result for this word shift
6547 Register ResultForK;
6548 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6549 Register MainOp = SrcParts[MainSrcIdx];
6550 Register CarryOp;
6551
6552 // Determine carry operand with bounds checking
6553 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6554 CarryOp = SrcParts[CarrySrcIdx];
6555 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6556 CarrySrcIdx >= (int)NumParts)
6557 CarryOp = FillValue; // Use sign extension
6558
6559 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6560 TargetTy, CarryOp);
6561 } else {
6562 // Out of bounds - use fill value for this k
6563 ResultForK = FillValue;
6564 }
6565
6566 // Select this result if WordShift equals k
6567 InBoundsResult =
6569 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6570 .getReg(0);
6571 }
6572
6573 // Handle zero-shift special case: if shift is 0, use original input
6574 DstParts[I] =
6576 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6577 .getReg(0);
6578 }
6579
6580 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6581 MI.eraseFromParent();
6582 return Legalized;
6583}
6584
6587 LLT MoreTy) {
6588 assert(TypeIdx == 0 && "Expecting only Idx 0");
6589
6590 Observer.changingInstr(MI);
6591 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6592 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6593 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6594 moreElementsVectorSrc(MI, MoreTy, I);
6595 }
6596
6597 MachineBasicBlock &MBB = *MI.getParent();
6598 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6599 moreElementsVectorDst(MI, MoreTy, 0);
6600 Observer.changedInstr(MI);
6601 return Legalized;
6602}
6603
6604MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6605 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6606 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6607
6608 switch (Opcode) {
6609 default:
6611 "getNeutralElementForVecReduce called with invalid opcode!");
6612 case TargetOpcode::G_VECREDUCE_ADD:
6613 case TargetOpcode::G_VECREDUCE_OR:
6614 case TargetOpcode::G_VECREDUCE_XOR:
6615 case TargetOpcode::G_VECREDUCE_UMAX:
6616 return MIRBuilder.buildConstant(Ty, 0);
6617 case TargetOpcode::G_VECREDUCE_MUL:
6618 return MIRBuilder.buildConstant(Ty, 1);
6619 case TargetOpcode::G_VECREDUCE_AND:
6620 case TargetOpcode::G_VECREDUCE_UMIN:
6622 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6623 case TargetOpcode::G_VECREDUCE_SMAX:
6625 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6626 case TargetOpcode::G_VECREDUCE_SMIN:
6628 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6629 case TargetOpcode::G_VECREDUCE_FADD:
6630 return MIRBuilder.buildFConstant(Ty, -0.0);
6631 case TargetOpcode::G_VECREDUCE_FMUL:
6632 return MIRBuilder.buildFConstant(Ty, 1.0);
6633 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6634 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6635 assert(false && "getNeutralElementForVecReduce unimplemented for "
6636 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6637 }
6638 llvm_unreachable("switch expected to return!");
6639}
6640
6643 LLT MoreTy) {
6644 unsigned Opc = MI.getOpcode();
6645 switch (Opc) {
6646 case TargetOpcode::G_IMPLICIT_DEF:
6647 case TargetOpcode::G_LOAD: {
6648 if (TypeIdx != 0)
6649 return UnableToLegalize;
6650 Observer.changingInstr(MI);
6651 moreElementsVectorDst(MI, MoreTy, 0);
6652 Observer.changedInstr(MI);
6653 return Legalized;
6654 }
6655 case TargetOpcode::G_STORE:
6656 if (TypeIdx != 0)
6657 return UnableToLegalize;
6658 Observer.changingInstr(MI);
6659 moreElementsVectorSrc(MI, MoreTy, 0);
6660 Observer.changedInstr(MI);
6661 return Legalized;
6662 case TargetOpcode::G_AND:
6663 case TargetOpcode::G_OR:
6664 case TargetOpcode::G_XOR:
6665 case TargetOpcode::G_ADD:
6666 case TargetOpcode::G_SUB:
6667 case TargetOpcode::G_MUL:
6668 case TargetOpcode::G_FADD:
6669 case TargetOpcode::G_FSUB:
6670 case TargetOpcode::G_FMUL:
6671 case TargetOpcode::G_FDIV:
6672 case TargetOpcode::G_FCOPYSIGN:
6673 case TargetOpcode::G_UADDSAT:
6674 case TargetOpcode::G_USUBSAT:
6675 case TargetOpcode::G_SADDSAT:
6676 case TargetOpcode::G_SSUBSAT:
6677 case TargetOpcode::G_SMIN:
6678 case TargetOpcode::G_SMAX:
6679 case TargetOpcode::G_UMIN:
6680 case TargetOpcode::G_UMAX:
6681 case TargetOpcode::G_FMINNUM:
6682 case TargetOpcode::G_FMAXNUM:
6683 case TargetOpcode::G_FMINNUM_IEEE:
6684 case TargetOpcode::G_FMAXNUM_IEEE:
6685 case TargetOpcode::G_FMINIMUM:
6686 case TargetOpcode::G_FMAXIMUM:
6687 case TargetOpcode::G_FMINIMUMNUM:
6688 case TargetOpcode::G_FMAXIMUMNUM:
6689 case TargetOpcode::G_STRICT_FADD:
6690 case TargetOpcode::G_STRICT_FSUB:
6691 case TargetOpcode::G_STRICT_FMUL: {
6692 Observer.changingInstr(MI);
6693 moreElementsVectorSrc(MI, MoreTy, 1);
6694 moreElementsVectorSrc(MI, MoreTy, 2);
6695 moreElementsVectorDst(MI, MoreTy, 0);
6696 Observer.changedInstr(MI);
6697 return Legalized;
6698 }
6699 case TargetOpcode::G_SHL:
6700 case TargetOpcode::G_ASHR:
6701 case TargetOpcode::G_LSHR: {
6702 Observer.changingInstr(MI);
6703 moreElementsVectorSrc(MI, MoreTy, 1);
6704 // The shift operand may have a different scalar type from the source and
6705 // destination operands.
6706 LLT ShiftMoreTy = MoreTy.changeElementType(
6707 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6708 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6709 moreElementsVectorDst(MI, MoreTy, 0);
6710 Observer.changedInstr(MI);
6711 return Legalized;
6712 }
6713 case TargetOpcode::G_FMA:
6714 case TargetOpcode::G_STRICT_FMA:
6715 case TargetOpcode::G_FSHR:
6716 case TargetOpcode::G_FSHL: {
6717 Observer.changingInstr(MI);
6718 moreElementsVectorSrc(MI, MoreTy, 1);
6719 moreElementsVectorSrc(MI, MoreTy, 2);
6720 moreElementsVectorSrc(MI, MoreTy, 3);
6721 moreElementsVectorDst(MI, MoreTy, 0);
6722 Observer.changedInstr(MI);
6723 return Legalized;
6724 }
6725 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6726 case TargetOpcode::G_EXTRACT:
6727 if (TypeIdx != 1)
6728 return UnableToLegalize;
6729 Observer.changingInstr(MI);
6730 moreElementsVectorSrc(MI, MoreTy, 1);
6731 Observer.changedInstr(MI);
6732 return Legalized;
6733 case TargetOpcode::G_INSERT:
6734 case TargetOpcode::G_INSERT_VECTOR_ELT:
6735 case TargetOpcode::G_FREEZE:
6736 case TargetOpcode::G_FNEG:
6737 case TargetOpcode::G_FABS:
6738 case TargetOpcode::G_FSQRT:
6739 case TargetOpcode::G_FCEIL:
6740 case TargetOpcode::G_FFLOOR:
6741 case TargetOpcode::G_FNEARBYINT:
6742 case TargetOpcode::G_FRINT:
6743 case TargetOpcode::G_INTRINSIC_ROUND:
6744 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6745 case TargetOpcode::G_INTRINSIC_TRUNC:
6746 case TargetOpcode::G_BITREVERSE:
6747 case TargetOpcode::G_BSWAP:
6748 case TargetOpcode::G_FCANONICALIZE:
6749 case TargetOpcode::G_SEXT_INREG:
6750 case TargetOpcode::G_ABS:
6751 case TargetOpcode::G_CTLZ:
6752 case TargetOpcode::G_CTPOP:
6753 if (TypeIdx != 0)
6754 return UnableToLegalize;
6755 Observer.changingInstr(MI);
6756 moreElementsVectorSrc(MI, MoreTy, 1);
6757 moreElementsVectorDst(MI, MoreTy, 0);
6758 Observer.changedInstr(MI);
6759 return Legalized;
6760 case TargetOpcode::G_SELECT: {
6761 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6762 if (TypeIdx == 1) {
6763 if (!CondTy.isScalar() ||
6764 DstTy.getElementCount() != MoreTy.getElementCount())
6765 return UnableToLegalize;
6766
6767 // This is turning a scalar select of vectors into a vector
6768 // select. Broadcast the select condition.
6769 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6770 Observer.changingInstr(MI);
6771 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6772 Observer.changedInstr(MI);
6773 return Legalized;
6774 }
6775
6776 if (CondTy.isVector())
6777 return UnableToLegalize;
6778
6779 Observer.changingInstr(MI);
6780 moreElementsVectorSrc(MI, MoreTy, 2);
6781 moreElementsVectorSrc(MI, MoreTy, 3);
6782 moreElementsVectorDst(MI, MoreTy, 0);
6783 Observer.changedInstr(MI);
6784 return Legalized;
6785 }
6786 case TargetOpcode::G_UNMERGE_VALUES:
6787 return UnableToLegalize;
6788 case TargetOpcode::G_PHI:
6789 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6790 case TargetOpcode::G_SHUFFLE_VECTOR:
6791 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6792 case TargetOpcode::G_BUILD_VECTOR: {
6794 for (auto Op : MI.uses()) {
6795 Elts.push_back(Op.getReg());
6796 }
6797
6798 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6799 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6800 }
6801
6802 MIRBuilder.buildDeleteTrailingVectorElements(
6803 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6804 MI.eraseFromParent();
6805 return Legalized;
6806 }
6807 case TargetOpcode::G_SEXT:
6808 case TargetOpcode::G_ZEXT:
6809 case TargetOpcode::G_ANYEXT:
6810 case TargetOpcode::G_TRUNC:
6811 case TargetOpcode::G_FPTRUNC:
6812 case TargetOpcode::G_FPEXT:
6813 case TargetOpcode::G_FPTOSI:
6814 case TargetOpcode::G_FPTOUI:
6815 case TargetOpcode::G_FPTOSI_SAT:
6816 case TargetOpcode::G_FPTOUI_SAT:
6817 case TargetOpcode::G_SITOFP:
6818 case TargetOpcode::G_UITOFP: {
6819 Observer.changingInstr(MI);
6820 LLT SrcExtTy;
6821 LLT DstExtTy;
6822 if (TypeIdx == 0) {
6823 DstExtTy = MoreTy;
6824 SrcExtTy = MoreTy.changeElementType(
6825 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6826 } else {
6827 DstExtTy = MoreTy.changeElementType(
6828 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6829 SrcExtTy = MoreTy;
6830 }
6831 moreElementsVectorSrc(MI, SrcExtTy, 1);
6832 moreElementsVectorDst(MI, DstExtTy, 0);
6833 Observer.changedInstr(MI);
6834 return Legalized;
6835 }
6836 case TargetOpcode::G_ICMP:
6837 case TargetOpcode::G_FCMP: {
6838 if (TypeIdx != 1)
6839 return UnableToLegalize;
6840
6841 Observer.changingInstr(MI);
6842 moreElementsVectorSrc(MI, MoreTy, 2);
6843 moreElementsVectorSrc(MI, MoreTy, 3);
6844 LLT CondTy = MoreTy.changeVectorElementType(
6845 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6846 moreElementsVectorDst(MI, CondTy, 0);
6847 Observer.changedInstr(MI);
6848 return Legalized;
6849 }
6850 case TargetOpcode::G_BITCAST: {
6851 if (TypeIdx != 0)
6852 return UnableToLegalize;
6853
6854 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6855 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6856
6857 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6858 if (coefficient % DstTy.getNumElements() != 0)
6859 return UnableToLegalize;
6860
6861 coefficient = coefficient / DstTy.getNumElements();
6862
6863 LLT NewTy = SrcTy.changeElementCount(
6864 ElementCount::get(coefficient, MoreTy.isScalable()));
6865 Observer.changingInstr(MI);
6866 moreElementsVectorSrc(MI, NewTy, 1);
6867 moreElementsVectorDst(MI, MoreTy, 0);
6868 Observer.changedInstr(MI);
6869 return Legalized;
6870 }
6871 case TargetOpcode::G_VECREDUCE_FADD:
6872 case TargetOpcode::G_VECREDUCE_FMUL:
6873 case TargetOpcode::G_VECREDUCE_ADD:
6874 case TargetOpcode::G_VECREDUCE_MUL:
6875 case TargetOpcode::G_VECREDUCE_AND:
6876 case TargetOpcode::G_VECREDUCE_OR:
6877 case TargetOpcode::G_VECREDUCE_XOR:
6878 case TargetOpcode::G_VECREDUCE_SMAX:
6879 case TargetOpcode::G_VECREDUCE_SMIN:
6880 case TargetOpcode::G_VECREDUCE_UMAX:
6881 case TargetOpcode::G_VECREDUCE_UMIN: {
6882 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6883 MachineOperand &MO = MI.getOperand(1);
6884 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6885 auto NeutralElement = getNeutralElementForVecReduce(
6886 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6887
6888 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6889 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6890 i != e; i++) {
6891 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6892 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6893 NeutralElement, Idx);
6894 }
6895
6896 Observer.changingInstr(MI);
6897 MO.setReg(NewVec.getReg(0));
6898 Observer.changedInstr(MI);
6899 return Legalized;
6900 }
6901
6902 default:
6903 return UnableToLegalize;
6904 }
6905}
6906
6909 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6910 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6911 unsigned MaskNumElts = Mask.size();
6912 unsigned SrcNumElts = SrcTy.getNumElements();
6913 LLT DestEltTy = DstTy.getElementType();
6914
6915 if (MaskNumElts == SrcNumElts)
6916 return Legalized;
6917
6918 if (MaskNumElts < SrcNumElts) {
6919 // Extend mask to match new destination vector size with
6920 // undef values.
6921 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6922 llvm::copy(Mask, NewMask.begin());
6923
6924 moreElementsVectorDst(MI, SrcTy, 0);
6925 MIRBuilder.setInstrAndDebugLoc(MI);
6926 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6927 MI.getOperand(1).getReg(),
6928 MI.getOperand(2).getReg(), NewMask);
6929 MI.eraseFromParent();
6930
6931 return Legalized;
6932 }
6933
6934 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6935 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6936 LLT PaddedTy =
6937 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6938
6939 // Create new source vectors by concatenating the initial
6940 // source vectors with undefined vectors of the same size.
6941 auto Undef = MIRBuilder.buildUndef(SrcTy);
6942 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6943 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6944 MOps1[0] = MI.getOperand(1).getReg();
6945 MOps2[0] = MI.getOperand(2).getReg();
6946
6947 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6948 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6949
6950 // Readjust mask for new input vector length.
6951 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6952 for (unsigned I = 0; I != MaskNumElts; ++I) {
6953 int Idx = Mask[I];
6954 if (Idx >= static_cast<int>(SrcNumElts))
6955 Idx += PaddedMaskNumElts - SrcNumElts;
6956 MappedOps[I] = Idx;
6957 }
6958
6959 // If we got more elements than required, extract subvector.
6960 if (MaskNumElts != PaddedMaskNumElts) {
6961 auto Shuffle =
6962 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6963
6964 SmallVector<Register, 16> Elts(MaskNumElts);
6965 for (unsigned I = 0; I < MaskNumElts; ++I) {
6966 Elts[I] =
6967 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6968 .getReg(0);
6969 }
6970 MIRBuilder.buildBuildVector(DstReg, Elts);
6971 } else {
6972 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6973 }
6974
6975 MI.eraseFromParent();
6977}
6978
6981 unsigned int TypeIdx, LLT MoreTy) {
6982 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6983 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6984 unsigned NumElts = DstTy.getNumElements();
6985 unsigned WidenNumElts = MoreTy.getNumElements();
6986
6987 if (DstTy.isVector() && Src1Ty.isVector() &&
6988 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6990 }
6991
6992 if (TypeIdx != 0)
6993 return UnableToLegalize;
6994
6995 // Expect a canonicalized shuffle.
6996 if (DstTy != Src1Ty || DstTy != Src2Ty)
6997 return UnableToLegalize;
6998
6999 moreElementsVectorSrc(MI, MoreTy, 1);
7000 moreElementsVectorSrc(MI, MoreTy, 2);
7001
7002 // Adjust mask based on new input vector length.
7003 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7004 for (unsigned I = 0; I != NumElts; ++I) {
7005 int Idx = Mask[I];
7006 if (Idx < static_cast<int>(NumElts))
7007 NewMask[I] = Idx;
7008 else
7009 NewMask[I] = Idx - NumElts + WidenNumElts;
7010 }
7011 moreElementsVectorDst(MI, MoreTy, 0);
7012 MIRBuilder.setInstrAndDebugLoc(MI);
7013 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7014 MI.getOperand(1).getReg(),
7015 MI.getOperand(2).getReg(), NewMask);
7016 MI.eraseFromParent();
7017 return Legalized;
7018}
7019
7020void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7021 ArrayRef<Register> Src1Regs,
7022 ArrayRef<Register> Src2Regs,
7023 LLT NarrowTy) {
7025 unsigned SrcParts = Src1Regs.size();
7026 unsigned DstParts = DstRegs.size();
7027
7028 unsigned DstIdx = 0; // Low bits of the result.
7029 Register FactorSum =
7030 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7031 DstRegs[DstIdx] = FactorSum;
7032
7033 Register CarrySumPrevDstIdx;
7035
7036 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7037 // Collect low parts of muls for DstIdx.
7038 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7039 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7041 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7042 Factors.push_back(Mul.getReg(0));
7043 }
7044 // Collect high parts of muls from previous DstIdx.
7045 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7046 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7047 MachineInstrBuilder Umulh =
7048 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7049 Factors.push_back(Umulh.getReg(0));
7050 }
7051 // Add CarrySum from additions calculated for previous DstIdx.
7052 if (DstIdx != 1) {
7053 Factors.push_back(CarrySumPrevDstIdx);
7054 }
7055
7056 Register CarrySum;
7057 // Add all factors and accumulate all carries into CarrySum.
7058 if (DstIdx != DstParts - 1) {
7059 MachineInstrBuilder Uaddo =
7060 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7061 FactorSum = Uaddo.getReg(0);
7062 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7063 for (unsigned i = 2; i < Factors.size(); ++i) {
7064 MachineInstrBuilder Uaddo =
7065 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7066 FactorSum = Uaddo.getReg(0);
7067 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7068 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7069 }
7070 } else {
7071 // Since value for the next index is not calculated, neither is CarrySum.
7072 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7073 for (unsigned i = 2; i < Factors.size(); ++i)
7074 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7075 }
7076
7077 CarrySumPrevDstIdx = CarrySum;
7078 DstRegs[DstIdx] = FactorSum;
7079 Factors.clear();
7080 }
7081}
7082
7085 LLT NarrowTy) {
7086 if (TypeIdx != 0)
7087 return UnableToLegalize;
7088
7089 Register DstReg = MI.getOperand(0).getReg();
7090 LLT DstType = MRI.getType(DstReg);
7091 // FIXME: add support for vector types
7092 if (DstType.isVector())
7093 return UnableToLegalize;
7094
7095 unsigned Opcode = MI.getOpcode();
7096 unsigned OpO, OpE, OpF;
7097 switch (Opcode) {
7098 case TargetOpcode::G_SADDO:
7099 case TargetOpcode::G_SADDE:
7100 case TargetOpcode::G_UADDO:
7101 case TargetOpcode::G_UADDE:
7102 case TargetOpcode::G_ADD:
7103 OpO = TargetOpcode::G_UADDO;
7104 OpE = TargetOpcode::G_UADDE;
7105 OpF = TargetOpcode::G_UADDE;
7106 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7107 OpF = TargetOpcode::G_SADDE;
7108 break;
7109 case TargetOpcode::G_SSUBO:
7110 case TargetOpcode::G_SSUBE:
7111 case TargetOpcode::G_USUBO:
7112 case TargetOpcode::G_USUBE:
7113 case TargetOpcode::G_SUB:
7114 OpO = TargetOpcode::G_USUBO;
7115 OpE = TargetOpcode::G_USUBE;
7116 OpF = TargetOpcode::G_USUBE;
7117 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7118 OpF = TargetOpcode::G_SSUBE;
7119 break;
7120 default:
7121 llvm_unreachable("Unexpected add/sub opcode!");
7122 }
7123
7124 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7125 unsigned NumDefs = MI.getNumExplicitDefs();
7126 Register Src1 = MI.getOperand(NumDefs).getReg();
7127 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7128 Register CarryDst, CarryIn;
7129 if (NumDefs == 2)
7130 CarryDst = MI.getOperand(1).getReg();
7131 if (MI.getNumOperands() == NumDefs + 3)
7132 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7133
7134 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7135 LLT LeftoverTy, DummyTy;
7136 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7137 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7138 MIRBuilder, MRI);
7139 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7140 MRI);
7141
7142 int NarrowParts = Src1Regs.size();
7143 Src1Regs.append(Src1Left);
7144 Src2Regs.append(Src2Left);
7145 DstRegs.reserve(Src1Regs.size());
7146
7147 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7148 Register DstReg =
7149 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7150 Register CarryOut;
7151 // Forward the final carry-out to the destination register
7152 if (i == e - 1 && CarryDst)
7153 CarryOut = CarryDst;
7154 else
7155 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7156
7157 if (!CarryIn) {
7158 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7159 {Src1Regs[i], Src2Regs[i]});
7160 } else if (i == e - 1) {
7161 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7162 {Src1Regs[i], Src2Regs[i], CarryIn});
7163 } else {
7164 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7165 {Src1Regs[i], Src2Regs[i], CarryIn});
7166 }
7167
7168 DstRegs.push_back(DstReg);
7169 CarryIn = CarryOut;
7170 }
7171 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7172 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7173 ArrayRef(DstRegs).drop_front(NarrowParts));
7174
7175 MI.eraseFromParent();
7176 return Legalized;
7177}
7178
7181 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7182
7183 LLT Ty = MRI.getType(DstReg);
7184 if (Ty.isVector())
7185 return UnableToLegalize;
7186
7187 unsigned Size = Ty.getSizeInBits();
7188 unsigned NarrowSize = NarrowTy.getSizeInBits();
7189 if (Size % NarrowSize != 0)
7190 return UnableToLegalize;
7191
7192 unsigned NumParts = Size / NarrowSize;
7193 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7194 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7195
7196 SmallVector<Register, 2> Src1Parts, Src2Parts;
7197 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7198 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7199 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7200 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7201
7202 // Take only high half of registers if this is high mul.
7203 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7204 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7205 MI.eraseFromParent();
7206 return Legalized;
7207}
7208
7211 LLT NarrowTy) {
7212 if (TypeIdx != 0)
7213 return UnableToLegalize;
7214
7215 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7216
7217 Register Src = MI.getOperand(1).getReg();
7218 LLT SrcTy = MRI.getType(Src);
7219
7220 // If all finite floats fit into the narrowed integer type, we can just swap
7221 // out the result type. This is practically only useful for conversions from
7222 // half to at least 16-bits, so just handle the one case.
7223 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7224 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7225 return UnableToLegalize;
7226
7227 Observer.changingInstr(MI);
7228 narrowScalarDst(MI, NarrowTy, 0,
7229 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7230 Observer.changedInstr(MI);
7231 return Legalized;
7232}
7233
7236 LLT NarrowTy) {
7237 if (TypeIdx != 1)
7238 return UnableToLegalize;
7239
7240 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7241
7242 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7243 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7244 // NarrowSize.
7245 if (SizeOp1 % NarrowSize != 0)
7246 return UnableToLegalize;
7247 int NumParts = SizeOp1 / NarrowSize;
7248
7249 SmallVector<Register, 2> SrcRegs, DstRegs;
7250 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7251 MIRBuilder, MRI);
7252
7253 Register OpReg = MI.getOperand(0).getReg();
7254 uint64_t OpStart = MI.getOperand(2).getImm();
7255 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7256 for (int i = 0; i < NumParts; ++i) {
7257 unsigned SrcStart = i * NarrowSize;
7258
7259 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7260 // No part of the extract uses this subregister, ignore it.
7261 continue;
7262 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7263 // The entire subregister is extracted, forward the value.
7264 DstRegs.push_back(SrcRegs[i]);
7265 continue;
7266 }
7267
7268 // OpSegStart is where this destination segment would start in OpReg if it
7269 // extended infinitely in both directions.
7270 int64_t ExtractOffset;
7271 uint64_t SegSize;
7272 if (OpStart < SrcStart) {
7273 ExtractOffset = 0;
7274 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7275 } else {
7276 ExtractOffset = OpStart - SrcStart;
7277 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7278 }
7279
7280 Register SegReg = SrcRegs[i];
7281 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7282 // A genuine extract is needed.
7283 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7284 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7285 }
7286
7287 DstRegs.push_back(SegReg);
7288 }
7289
7290 Register DstReg = MI.getOperand(0).getReg();
7291 if (MRI.getType(DstReg).isVector())
7292 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7293 else if (DstRegs.size() > 1)
7294 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7295 else
7296 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7297 MI.eraseFromParent();
7298 return Legalized;
7299}
7300
7303 LLT NarrowTy) {
7304 // FIXME: Don't know how to handle secondary types yet.
7305 if (TypeIdx != 0)
7306 return UnableToLegalize;
7307
7308 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7309 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7310 LLT LeftoverTy;
7311 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7312 LeftoverRegs, MIRBuilder, MRI);
7313
7314 SrcRegs.append(LeftoverRegs);
7315
7316 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7317 Register OpReg = MI.getOperand(2).getReg();
7318 uint64_t OpStart = MI.getOperand(3).getImm();
7319 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7320 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7321 unsigned DstStart = I * NarrowSize;
7322
7323 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7324 // The entire subregister is defined by this insert, forward the new
7325 // value.
7326 DstRegs.push_back(OpReg);
7327 continue;
7328 }
7329
7330 Register SrcReg = SrcRegs[I];
7331 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7332 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7333 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7334 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7335 }
7336
7337 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7338 // No part of the insert affects this subregister, forward the original.
7339 DstRegs.push_back(SrcReg);
7340 continue;
7341 }
7342
7343 // OpSegStart is where this destination segment would start in OpReg if it
7344 // extended infinitely in both directions.
7345 int64_t ExtractOffset, InsertOffset;
7346 uint64_t SegSize;
7347 if (OpStart < DstStart) {
7348 InsertOffset = 0;
7349 ExtractOffset = DstStart - OpStart;
7350 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7351 } else {
7352 InsertOffset = OpStart - DstStart;
7353 ExtractOffset = 0;
7354 SegSize =
7355 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7356 }
7357
7358 Register SegReg = OpReg;
7359 if (ExtractOffset != 0 || SegSize != OpSize) {
7360 // A genuine extract is needed.
7361 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7362 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7363 }
7364
7365 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7366 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7367 DstRegs.push_back(DstReg);
7368 }
7369
7370 uint64_t WideSize = DstRegs.size() * NarrowSize;
7371 Register DstReg = MI.getOperand(0).getReg();
7372 if (WideSize > RegTy.getSizeInBits()) {
7373 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7374 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7375 MIRBuilder.buildTrunc(DstReg, MergeReg);
7376 } else
7377 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7378
7379 MI.eraseFromParent();
7380 return Legalized;
7381}
7382
7385 LLT NarrowTy) {
7386 Register DstReg = MI.getOperand(0).getReg();
7387 LLT DstTy = MRI.getType(DstReg);
7388
7389 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7390
7391 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7392 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7393 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7394 LLT LeftoverTy;
7395 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7396 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7397 return UnableToLegalize;
7398
7399 LLT Unused;
7400 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7401 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7402 llvm_unreachable("inconsistent extractParts result");
7403
7404 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7405 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7406 {Src0Regs[I], Src1Regs[I]});
7407 DstRegs.push_back(Inst.getReg(0));
7408 }
7409
7410 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7411 auto Inst = MIRBuilder.buildInstr(
7412 MI.getOpcode(),
7413 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7414 DstLeftoverRegs.push_back(Inst.getReg(0));
7415 }
7416
7417 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7418 LeftoverTy, DstLeftoverRegs);
7419
7420 MI.eraseFromParent();
7421 return Legalized;
7422}
7423
7426 LLT NarrowTy) {
7427 if (TypeIdx != 0)
7428 return UnableToLegalize;
7429
7430 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7431
7432 LLT DstTy = MRI.getType(DstReg);
7433 if (DstTy.isVector())
7434 return UnableToLegalize;
7435
7437 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7438 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7439 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7440
7441 MI.eraseFromParent();
7442 return Legalized;
7443}
7444
7447 LLT NarrowTy) {
7448 if (TypeIdx != 0)
7449 return UnableToLegalize;
7450
7451 Register CondReg = MI.getOperand(1).getReg();
7452 LLT CondTy = MRI.getType(CondReg);
7453 if (CondTy.isVector()) // TODO: Handle vselect
7454 return UnableToLegalize;
7455
7456 Register DstReg = MI.getOperand(0).getReg();
7457 LLT DstTy = MRI.getType(DstReg);
7458
7459 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7460 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7461 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7462 LLT LeftoverTy;
7463 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7464 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7465 return UnableToLegalize;
7466
7467 LLT Unused;
7468 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7469 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7470 llvm_unreachable("inconsistent extractParts result");
7471
7472 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7473 auto Select = MIRBuilder.buildSelect(NarrowTy,
7474 CondReg, Src1Regs[I], Src2Regs[I]);
7475 DstRegs.push_back(Select.getReg(0));
7476 }
7477
7478 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7479 auto Select = MIRBuilder.buildSelect(
7480 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7481 DstLeftoverRegs.push_back(Select.getReg(0));
7482 }
7483
7484 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7485 LeftoverTy, DstLeftoverRegs);
7486
7487 MI.eraseFromParent();
7488 return Legalized;
7489}
7490
7493 LLT NarrowTy) {
7494 if (TypeIdx != 1)
7495 return UnableToLegalize;
7496
7497 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7498 unsigned NarrowSize = NarrowTy.getSizeInBits();
7499
7500 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7501 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7502
7504 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7505 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7506 auto C_0 = B.buildConstant(NarrowTy, 0);
7507 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7508 UnmergeSrc.getReg(1), C_0);
7509 auto LoCTLZ = IsUndef ?
7510 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7511 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7512 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7513 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7514 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7515 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7516
7517 MI.eraseFromParent();
7518 return Legalized;
7519 }
7520
7521 return UnableToLegalize;
7522}
7523
7526 LLT NarrowTy) {
7527 if (TypeIdx != 1)
7528 return UnableToLegalize;
7529
7530 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7531 unsigned NarrowSize = NarrowTy.getSizeInBits();
7532
7533 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7534 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7535
7537 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7538 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7539 auto C_0 = B.buildConstant(NarrowTy, 0);
7540 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7541 UnmergeSrc.getReg(0), C_0);
7542 auto HiCTTZ = IsUndef ?
7543 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7544 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7545 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7546 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7547 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7548 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7549
7550 MI.eraseFromParent();
7551 return Legalized;
7552 }
7553
7554 return UnableToLegalize;
7555}
7556
7559 LLT NarrowTy) {
7560 if (TypeIdx != 1)
7561 return UnableToLegalize;
7562
7563 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7564 unsigned NarrowSize = NarrowTy.getSizeInBits();
7565
7566 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7567 return UnableToLegalize;
7568
7570
7571 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7572 Register Lo = UnmergeSrc.getReg(0);
7573 Register Hi = UnmergeSrc.getReg(1);
7574
7575 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7576 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7577
7578 auto LoSign = B.buildAShr(NarrowTy, Lo, ShAmt);
7579 auto LoSameSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7580 LoSign.getReg(0), Sign.getReg(0));
7581
7582 auto HiIsSign =
7583 B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign.getReg(0));
7584
7585 auto LoCTLS = B.buildCTLS(DstTy, Lo);
7586 auto GNarrowSize = B.buildConstant(DstTy, NarrowSize);
7587 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLS, GNarrowSize);
7588
7589 // If the low half flips sign, the run of redundant bits stops at the
7590 // boundary, so use (NarrowSize - 1) instead of extending into Lo.
7591 auto GNarrowSizeMinus1 = B.buildConstant(DstTy, NarrowSize - 1);
7592 auto HiSignResult =
7593 B.buildSelect(DstTy, LoSameSign, HiIsSignCTLS, GNarrowSizeMinus1);
7594
7595 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7596
7597 B.buildSelect(DstReg, HiIsSign, HiSignResult, HiCTLS);
7598
7599 MI.eraseFromParent();
7600 return Legalized;
7601}
7602
7605 LLT NarrowTy) {
7606 if (TypeIdx != 1)
7607 return UnableToLegalize;
7608
7609 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7610 unsigned NarrowSize = NarrowTy.getSizeInBits();
7611
7612 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7613 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7614
7615 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7616 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7617 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7618
7619 MI.eraseFromParent();
7620 return Legalized;
7621 }
7622
7623 return UnableToLegalize;
7624}
7625
7628 LLT NarrowTy) {
7629 if (TypeIdx != 1)
7630 return UnableToLegalize;
7631
7633 Register ExpReg = MI.getOperand(2).getReg();
7634 LLT ExpTy = MRI.getType(ExpReg);
7635
7636 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7637
7638 // Clamp the exponent to the range of the target type.
7639 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7640 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7641 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7642 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7643
7644 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7645 Observer.changingInstr(MI);
7646 MI.getOperand(2).setReg(Trunc.getReg(0));
7647 Observer.changedInstr(MI);
7648 return Legalized;
7649}
7650
7653 unsigned Opc = MI.getOpcode();
7654 const auto &TII = MIRBuilder.getTII();
7655 auto isSupported = [this](const LegalityQuery &Q) {
7656 auto QAction = LI.getAction(Q).Action;
7657 return QAction == Legal || QAction == Libcall || QAction == Custom;
7658 };
7659 switch (Opc) {
7660 default:
7661 return UnableToLegalize;
7662 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7663 // This trivially expands to CTLZ.
7664 Observer.changingInstr(MI);
7665 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7666 Observer.changedInstr(MI);
7667 return Legalized;
7668 }
7669 case TargetOpcode::G_CTLZ: {
7670 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7671 unsigned Len = SrcTy.getScalarSizeInBits();
7672
7673 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7674 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7675 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7676 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7677 auto ICmp = MIRBuilder.buildICmp(
7678 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7679 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7680 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7681 MI.eraseFromParent();
7682 return Legalized;
7683 }
7684 // for now, we do this:
7685 // NewLen = NextPowerOf2(Len);
7686 // x = x | (x >> 1);
7687 // x = x | (x >> 2);
7688 // ...
7689 // x = x | (x >>16);
7690 // x = x | (x >>32); // for 64-bit input
7691 // Upto NewLen/2
7692 // return Len - popcount(x);
7693 //
7694 // Ref: "Hacker's Delight" by Henry Warren
7695 Register Op = SrcReg;
7696 unsigned NewLen = PowerOf2Ceil(Len);
7697 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7698 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7699 auto MIBOp = MIRBuilder.buildOr(
7700 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7701 Op = MIBOp.getReg(0);
7702 }
7703 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7704 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7705 MIBPop);
7706 MI.eraseFromParent();
7707 return Legalized;
7708 }
7709 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7710 // This trivially expands to CTTZ.
7711 Observer.changingInstr(MI);
7712 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7713 Observer.changedInstr(MI);
7714 return Legalized;
7715 }
7716 case TargetOpcode::G_CTTZ: {
7717 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7718
7719 unsigned Len = SrcTy.getScalarSizeInBits();
7720 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7721 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7722 // zero.
7723 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7724 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7725 auto ICmp = MIRBuilder.buildICmp(
7726 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7727 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7728 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7729 MI.eraseFromParent();
7730 return Legalized;
7731 }
7732 // for now, we use: { return popcount(~x & (x - 1)); }
7733 // unless the target has ctlz but not ctpop, in which case we use:
7734 // { return 32 - nlz(~x & (x-1)); }
7735 // Ref: "Hacker's Delight" by Henry Warren
7736 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7737 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7738 auto MIBTmp = MIRBuilder.buildAnd(
7739 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7740 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7741 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7742 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7743 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7744 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7745 MI.eraseFromParent();
7746 return Legalized;
7747 }
7748 Observer.changingInstr(MI);
7749 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7750 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7751 Observer.changedInstr(MI);
7752 return Legalized;
7753 }
7754 case TargetOpcode::G_CTPOP: {
7755 Register SrcReg = MI.getOperand(1).getReg();
7756 LLT Ty = MRI.getType(SrcReg);
7757 unsigned Size = Ty.getScalarSizeInBits();
7759
7760 // Bail out on irregular type lengths.
7761 if (Size > 128 || Size % 8 != 0)
7762 return UnableToLegalize;
7763
7764 // Count set bits in blocks of 2 bits. Default approach would be
7765 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7766 // We use following formula instead:
7767 // B2Count = val - { (val >> 1) & 0x55555555 }
7768 // since it gives same result in blocks of 2 with one instruction less.
7769 auto C_1 = B.buildConstant(Ty, 1);
7770 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7771 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7772 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7773 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7774 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7775
7776 // In order to get count in blocks of 4 add values from adjacent block of 2.
7777 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7778 auto C_2 = B.buildConstant(Ty, 2);
7779 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7780 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7781 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7782 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7783 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7784 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7785
7786 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7787 // addition since count value sits in range {0,...,8} and 4 bits are enough
7788 // to hold such binary values. After addition high 4 bits still hold count
7789 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7790 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7791 auto C_4 = B.buildConstant(Ty, 4);
7792 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7793 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7794 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7795 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7796 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7797
7798 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7799 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7800 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7801 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7802
7803 // Shift count result from 8 high bits to low bits.
7804 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7805
7806 auto IsMulSupported = [this](const LLT Ty) {
7807 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7808 return Action == Legal || Action == WidenScalar || Action == Custom;
7809 };
7810 if (IsMulSupported(Ty)) {
7811 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7812 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7813 } else {
7814 auto ResTmp = B8Count;
7815 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7816 auto ShiftC = B.buildConstant(Ty, Shift);
7817 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7818 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7819 }
7820 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7821 }
7822 MI.eraseFromParent();
7823 return Legalized;
7824 }
7825 case TargetOpcode::G_CTLS: {
7826 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7827
7828 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7829 auto SignIdxC =
7830 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7831 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7832
7833 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7834
7835 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7836 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7837
7838 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7839 MI.eraseFromParent();
7840 return Legalized;
7841 }
7842 }
7843}
7844
7845// Check that (every element of) Reg is undef or not an exact multiple of BW.
7847 Register Reg, unsigned BW) {
7848 return matchUnaryPredicate(
7849 MRI, Reg,
7850 [=](const Constant *C) {
7851 // Null constant here means an undef.
7853 return !CI || CI->getValue().urem(BW) != 0;
7854 },
7855 /*AllowUndefs*/ true);
7856}
7857
7860 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7861 LLT Ty = MRI.getType(Dst);
7862 LLT ShTy = MRI.getType(Z);
7863
7864 unsigned BW = Ty.getScalarSizeInBits();
7865
7866 if (!isPowerOf2_32(BW))
7867 return UnableToLegalize;
7868
7869 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7870 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7871
7872 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7873 // fshl X, Y, Z -> fshr X, Y, -Z
7874 // fshr X, Y, Z -> fshl X, Y, -Z
7875 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7876 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7877 } else {
7878 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7879 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7880 auto One = MIRBuilder.buildConstant(ShTy, 1);
7881 if (IsFSHL) {
7882 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7883 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7884 } else {
7885 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7886 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7887 }
7888
7889 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7890 }
7891
7892 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7893 MI.eraseFromParent();
7894 return Legalized;
7895}
7896
7899 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7900 LLT Ty = MRI.getType(Dst);
7901 LLT ShTy = MRI.getType(Z);
7902
7903 const unsigned BW = Ty.getScalarSizeInBits();
7904 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7905
7906 Register ShX, ShY;
7907 Register ShAmt, InvShAmt;
7908
7909 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7910 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7911 // fshl: X << C | Y >> (BW - C)
7912 // fshr: X << (BW - C) | Y >> C
7913 // where C = Z % BW is not zero
7914 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7915 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7916 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7917 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7918 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7919 } else {
7920 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7921 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7922 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7923 if (isPowerOf2_32(BW)) {
7924 // Z % BW -> Z & (BW - 1)
7925 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7926 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7927 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7928 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7929 } else {
7930 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7931 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7932 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7933 }
7934
7935 auto One = MIRBuilder.buildConstant(ShTy, 1);
7936 if (IsFSHL) {
7937 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7938 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7939 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7940 } else {
7941 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7942 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7943 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7944 }
7945 }
7946
7947 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7948 MI.eraseFromParent();
7949 return Legalized;
7950}
7951
7954 // These operations approximately do the following (while avoiding undefined
7955 // shifts by BW):
7956 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7957 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7958 Register Dst = MI.getOperand(0).getReg();
7959 LLT Ty = MRI.getType(Dst);
7960 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7961
7962 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7963 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7964
7965 // TODO: Use smarter heuristic that accounts for vector legalization.
7966 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7967 return lowerFunnelShiftAsShifts(MI);
7968
7969 // This only works for powers of 2, fallback to shifts if it fails.
7970 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7971 if (Result == UnableToLegalize)
7972 return lowerFunnelShiftAsShifts(MI);
7973 return Result;
7974}
7975
7977 auto [Dst, Src] = MI.getFirst2Regs();
7978 LLT DstTy = MRI.getType(Dst);
7979 LLT SrcTy = MRI.getType(Src);
7980
7981 uint32_t DstTySize = DstTy.getSizeInBits();
7982 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7983 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7984
7985 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7986 !isPowerOf2_32(SrcTyScalarSize))
7987 return UnableToLegalize;
7988
7989 // The step between extend is too large, split it by creating an intermediate
7990 // extend instruction
7991 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7992 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7993 // If the destination type is illegal, split it into multiple statements
7994 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7995 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7996 // Unmerge the vector
7997 LLT EltTy = MidTy.changeElementCount(
7999 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8000
8001 // ZExt the vectors
8002 LLT ZExtResTy = DstTy.changeElementCount(
8004 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8005 {UnmergeSrc.getReg(0)});
8006 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8007 {UnmergeSrc.getReg(1)});
8008
8009 // Merge the ending vectors
8010 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8011
8012 MI.eraseFromParent();
8013 return Legalized;
8014 }
8015 return UnableToLegalize;
8016}
8017
8019 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8020 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8021 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8022 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8023 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8024 // %lo16(<4 x s16>) = G_TRUNC %inlo
8025 // %hi16(<4 x s16>) = G_TRUNC %inhi
8026 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8027 // %res(<8 x s8>) = G_TRUNC %in16
8028
8029 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8030
8031 Register DstReg = MI.getOperand(0).getReg();
8032 Register SrcReg = MI.getOperand(1).getReg();
8033 LLT DstTy = MRI.getType(DstReg);
8034 LLT SrcTy = MRI.getType(SrcReg);
8035
8036 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8038 isPowerOf2_32(SrcTy.getNumElements()) &&
8039 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8040 // Split input type.
8041 LLT SplitSrcTy = SrcTy.changeElementCount(
8042 SrcTy.getElementCount().divideCoefficientBy(2));
8043
8044 // First, split the source into two smaller vectors.
8045 SmallVector<Register, 2> SplitSrcs;
8046 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8047
8048 // Truncate the splits into intermediate narrower elements.
8049 LLT InterTy;
8050 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8051 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8052 else
8053 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8054 for (Register &Src : SplitSrcs)
8055 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8056
8057 // Combine the new truncates into one vector
8058 auto Merge = MIRBuilder.buildMergeLikeInstr(
8059 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8060
8061 // Truncate the new vector to the final result type
8062 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8063 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8064 else
8065 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8066
8067 MI.eraseFromParent();
8068
8069 return Legalized;
8070 }
8071 return UnableToLegalize;
8072}
8073
8076 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8077 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8078 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8079 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8080 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8081 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8082 MI.eraseFromParent();
8083 return Legalized;
8084}
8085
8087 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8088
8089 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8090 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8091
8092 MIRBuilder.setInstrAndDebugLoc(MI);
8093
8094 // If a rotate in the other direction is supported, use it.
8095 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8096 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8097 isPowerOf2_32(EltSizeInBits))
8098 return lowerRotateWithReverseRotate(MI);
8099
8100 // If a funnel shift is supported, use it.
8101 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8102 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8103 bool IsFShLegal = false;
8104 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8105 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8106 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8107 Register R3) {
8108 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8109 MI.eraseFromParent();
8110 return Legalized;
8111 };
8112 // If a funnel shift in the other direction is supported, use it.
8113 if (IsFShLegal) {
8114 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8115 } else if (isPowerOf2_32(EltSizeInBits)) {
8116 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8117 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8118 }
8119 }
8120
8121 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8122 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8123 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8124 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8125 Register ShVal;
8126 Register RevShiftVal;
8127 if (isPowerOf2_32(EltSizeInBits)) {
8128 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8129 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8130 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8131 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8132 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8133 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8134 RevShiftVal =
8135 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8136 } else {
8137 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8138 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8139 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8140 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8141 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8142 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8143 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8144 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8145 RevShiftVal =
8146 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8147 }
8148 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8149 MI.eraseFromParent();
8150 return Legalized;
8151}
8152
8153// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8154// representation.
8157 auto [Dst, Src] = MI.getFirst2Regs();
8158 const LLT S64 = LLT::scalar(64);
8159 const LLT S32 = LLT::scalar(32);
8160 const LLT S1 = LLT::scalar(1);
8161
8162 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8163
8164 // unsigned cul2f(ulong u) {
8165 // uint lz = clz(u);
8166 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8167 // u = (u << lz) & 0x7fffffffffffffffUL;
8168 // ulong t = u & 0xffffffffffUL;
8169 // uint v = (e << 23) | (uint)(u >> 40);
8170 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8171 // return as_float(v + r);
8172 // }
8173
8174 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8175 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8176
8177 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8178
8179 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8180 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8181
8182 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8183 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8184
8185 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8186 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8187
8188 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8189
8190 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8191 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8192
8193 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8194 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8195 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8196
8197 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8198 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8199 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8200 auto One = MIRBuilder.buildConstant(S32, 1);
8201
8202 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8203 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8204 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8205 MIRBuilder.buildAdd(Dst, V, R);
8206
8207 MI.eraseFromParent();
8208 return Legalized;
8209}
8210
8211// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8212// operations and G_SITOFP
8215 auto [Dst, Src] = MI.getFirst2Regs();
8216 const LLT S64 = LLT::scalar(64);
8217 const LLT S32 = LLT::scalar(32);
8218 const LLT S1 = LLT::scalar(1);
8219
8220 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8221
8222 // For i64 < INT_MAX we simply reuse SITOFP.
8223 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8224 // saved before division, convert to float by SITOFP, multiply the result
8225 // by 2.
8226 auto One = MIRBuilder.buildConstant(S64, 1);
8227 auto Zero = MIRBuilder.buildConstant(S64, 0);
8228 // Result if Src < INT_MAX
8229 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8230 // Result if Src >= INT_MAX
8231 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8232 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8233 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8234 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8235 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8236 // Check if the original value is larger than INT_MAX by comparing with
8237 // zero to pick one of the two conversions.
8238 auto IsLarge =
8239 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8240 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8241
8242 MI.eraseFromParent();
8243 return Legalized;
8244}
8245
8246// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8247// IEEE double representation.
8250 auto [Dst, Src] = MI.getFirst2Regs();
8251 const LLT S64 = LLT::scalar(64);
8252 const LLT S32 = LLT::scalar(32);
8253
8254 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8255
8256 // We create double value from 32 bit parts with 32 exponent difference.
8257 // Note that + and - are float operations that adjust the implicit leading
8258 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8259 //
8260 // X = 2^52 * 1.0...LowBits
8261 // Y = 2^84 * 1.0...HighBits
8262 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8263 // = - 2^52 * 1.0...HighBits
8264 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8265 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8266 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8267 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8268 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8269 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8270
8271 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8272 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8273 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8274 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8275 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8276 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8277 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8278
8279 MI.eraseFromParent();
8280 return Legalized;
8281}
8282
8283/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8284/// convert fpround f64->f16 without double-rounding, so we manually perform the
8285/// lowering here where we know it is valid.
8288 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8289 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8290 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8291 : MIRBuilder.buildSITOFP(SrcTy, Src);
8292 LLT S32Ty = SrcTy.changeElementSize(32);
8293 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8294 MIRBuilder.buildFPTrunc(Dst, M2);
8295 MI.eraseFromParent();
8297}
8298
8300 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8301
8302 if (SrcTy == LLT::scalar(1)) {
8303 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8304 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8305 MIRBuilder.buildSelect(Dst, Src, True, False);
8306 MI.eraseFromParent();
8307 return Legalized;
8308 }
8309
8310 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8311 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8312
8313 if (SrcTy != LLT::scalar(64))
8314 return UnableToLegalize;
8315
8316 if (DstTy == LLT::scalar(32))
8317 // TODO: SelectionDAG has several alternative expansions to port which may
8318 // be more reasonable depending on the available instructions. We also need
8319 // a more advanced mechanism to choose an optimal version depending on
8320 // target features such as sitofp or CTLZ availability.
8322
8323 if (DstTy == LLT::scalar(64))
8325
8326 return UnableToLegalize;
8327}
8328
8330 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8331
8332 const LLT S64 = LLT::scalar(64);
8333 const LLT S32 = LLT::scalar(32);
8334 const LLT S1 = LLT::scalar(1);
8335
8336 if (SrcTy == S1) {
8337 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8338 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8339 MIRBuilder.buildSelect(Dst, Src, True, False);
8340 MI.eraseFromParent();
8341 return Legalized;
8342 }
8343
8344 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8345 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8346
8347 if (SrcTy != S64)
8348 return UnableToLegalize;
8349
8350 if (DstTy == S32) {
8351 // signed cl2f(long l) {
8352 // long s = l >> 63;
8353 // float r = cul2f((l + s) ^ s);
8354 // return s ? -r : r;
8355 // }
8356 Register L = Src;
8357 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8358 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8359
8360 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8361 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8362 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8363
8364 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8365 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8366 MIRBuilder.buildConstant(S64, 0));
8367 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8368 MI.eraseFromParent();
8369 return Legalized;
8370 }
8371
8372 return UnableToLegalize;
8373}
8374
8376 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8377 const LLT S64 = LLT::scalar(64);
8378 const LLT S32 = LLT::scalar(32);
8379
8380 if (SrcTy != S64 && SrcTy != S32)
8381 return UnableToLegalize;
8382 if (DstTy != S32 && DstTy != S64)
8383 return UnableToLegalize;
8384
8385 // FPTOSI gives same result as FPTOUI for positive signed integers.
8386 // FPTOUI needs to deal with fp values that convert to unsigned integers
8387 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8388
8389 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8390 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8392 APInt::getZero(SrcTy.getSizeInBits()));
8393 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8394
8395 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8396
8397 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8398 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8399 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8400 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8401 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8402 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8403 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8404
8405 const LLT S1 = LLT::scalar(1);
8406
8407 MachineInstrBuilder FCMP =
8408 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8409 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8410
8411 MI.eraseFromParent();
8412 return Legalized;
8413}
8414
8416 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8417 const LLT S64 = LLT::scalar(64);
8418 const LLT S32 = LLT::scalar(32);
8419
8420 // FIXME: Only f32 to i64 conversions are supported.
8421 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8422 return UnableToLegalize;
8423
8424 // Expand f32 -> i64 conversion
8425 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8426 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8427
8428 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8429
8430 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8431 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8432
8433 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8434 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8435
8436 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8437 APInt::getSignMask(SrcEltBits));
8438 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8439 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8440 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8441 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8442
8443 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8444 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8445 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8446
8447 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8448 R = MIRBuilder.buildZExt(DstTy, R);
8449
8450 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8451 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8452 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8453 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8454
8455 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8456 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8457
8458 const LLT S1 = LLT::scalar(1);
8459 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8460 S1, Exponent, ExponentLoBit);
8461
8462 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8463
8464 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8465 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8466
8467 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8468
8469 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8470 S1, Exponent, ZeroSrcTy);
8471
8472 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8473 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8474
8475 MI.eraseFromParent();
8476 return Legalized;
8477}
8478
8481 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8482
8483 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8484 unsigned SatWidth = DstTy.getScalarSizeInBits();
8485
8486 // Determine minimum and maximum integer values and their corresponding
8487 // floating-point values.
8488 APInt MinInt, MaxInt;
8489 if (IsSigned) {
8490 MinInt = APInt::getSignedMinValue(SatWidth);
8491 MaxInt = APInt::getSignedMaxValue(SatWidth);
8492 } else {
8493 MinInt = APInt::getMinValue(SatWidth);
8494 MaxInt = APInt::getMaxValue(SatWidth);
8495 }
8496
8497 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8498 APFloat MinFloat(Semantics);
8499 APFloat MaxFloat(Semantics);
8500
8501 APFloat::opStatus MinStatus =
8502 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8503 APFloat::opStatus MaxStatus =
8504 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8505 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8506 !(MaxStatus & APFloat::opStatus::opInexact);
8507
8508 // If the integer bounds are exactly representable as floats, emit a
8509 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8510 // and selects.
8511 if (AreExactFloatBounds) {
8512 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8513 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8514 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8515 SrcTy.changeElementSize(1), Src, MaxC);
8516 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8517 // Clamp by MaxFloat from above. NaN cannot occur.
8518 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8519 auto MinP =
8520 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8522 auto Min =
8523 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8524 // Convert clamped value to integer. In the unsigned case we're done,
8525 // because we mapped NaN to MinFloat, which will cast to zero.
8526 if (!IsSigned) {
8527 MIRBuilder.buildFPTOUI(Dst, Min);
8528 MI.eraseFromParent();
8529 return Legalized;
8530 }
8531
8532 // Otherwise, select 0 if Src is NaN.
8533 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8534 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8535 DstTy.changeElementSize(1), Src, Src);
8536 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8537 FpToInt);
8538 MI.eraseFromParent();
8539 return Legalized;
8540 }
8541
8542 // Result of direct conversion. The assumption here is that the operation is
8543 // non-trapping and it's fine to apply it to an out-of-range value if we
8544 // select it away later.
8545 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8546 : MIRBuilder.buildFPTOUI(DstTy, Src);
8547
8548 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8549 // MinInt if Src is NaN.
8550 auto ULT =
8551 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8552 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8553 auto Max = MIRBuilder.buildSelect(
8554 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8555 // If Src OGT MaxFloat, select MaxInt.
8556 auto OGT =
8557 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8558 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8559
8560 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8561 // is already zero.
8562 if (!IsSigned) {
8563 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8564 Max);
8565 MI.eraseFromParent();
8566 return Legalized;
8567 }
8568
8569 // Otherwise, select 0 if Src is NaN.
8570 auto Min = MIRBuilder.buildSelect(
8571 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8572 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8573 DstTy.changeElementSize(1), Src, Src);
8574 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8575 MI.eraseFromParent();
8576 return Legalized;
8577}
8578
8579// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8582 const LLT S1 = LLT::scalar(1);
8583 const LLT S32 = LLT::scalar(32);
8584
8585 auto [Dst, Src] = MI.getFirst2Regs();
8586 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8587 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8588
8589 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8590 return UnableToLegalize;
8591
8592 if (MI.getFlag(MachineInstr::FmAfn)) {
8593 unsigned Flags = MI.getFlags();
8594 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8595 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8596 MI.eraseFromParent();
8597 return Legalized;
8598 }
8599
8600 const unsigned ExpMask = 0x7ff;
8601 const unsigned ExpBiasf64 = 1023;
8602 const unsigned ExpBiasf16 = 15;
8603
8604 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8605 Register U = Unmerge.getReg(0);
8606 Register UH = Unmerge.getReg(1);
8607
8608 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8609 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8610
8611 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8612 // add the f16 bias (15) to get the biased exponent for the f16 format.
8613 E = MIRBuilder.buildAdd(
8614 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8615
8616 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8617 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8618
8619 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8620 MIRBuilder.buildConstant(S32, 0x1ff));
8621 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8622
8623 auto Zero = MIRBuilder.buildConstant(S32, 0);
8624 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8625 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8626 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8627
8628 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8629 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8630 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8631 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8632
8633 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8634 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8635
8636 // N = M | (E << 12);
8637 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8638 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8639
8640 // B = clamp(1-E, 0, 13);
8641 auto One = MIRBuilder.buildConstant(S32, 1);
8642 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8643 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8644 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8645
8646 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8647 MIRBuilder.buildConstant(S32, 0x1000));
8648
8649 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8650 auto D0 = MIRBuilder.buildShl(S32, D, B);
8651
8652 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8653 D0, SigSetHigh);
8654 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8655 D = MIRBuilder.buildOr(S32, D, D1);
8656
8657 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8658 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8659
8660 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8661 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8662
8663 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8664 MIRBuilder.buildConstant(S32, 3));
8665 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8666
8667 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8668 MIRBuilder.buildConstant(S32, 5));
8669 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8670
8671 V1 = MIRBuilder.buildOr(S32, V0, V1);
8672 V = MIRBuilder.buildAdd(S32, V, V1);
8673
8674 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8675 E, MIRBuilder.buildConstant(S32, 30));
8676 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8677 MIRBuilder.buildConstant(S32, 0x7c00), V);
8678
8679 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8680 E, MIRBuilder.buildConstant(S32, 1039));
8681 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8682
8683 // Extract the sign bit.
8684 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8685 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8686
8687 // Insert the sign bit
8688 V = MIRBuilder.buildOr(S32, Sign, V);
8689
8690 MIRBuilder.buildTrunc(Dst, V);
8691 MI.eraseFromParent();
8692 return Legalized;
8693}
8694
8697 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8698 const LLT S64 = LLT::scalar(64);
8699 const LLT S16 = LLT::scalar(16);
8700
8701 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8703
8704 return UnableToLegalize;
8705}
8706
8708 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8709 LLT Ty = MRI.getType(Dst);
8710
8711 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8712 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8718 switch (Opc) {
8719 case TargetOpcode::G_SMIN:
8720 return CmpInst::ICMP_SLT;
8721 case TargetOpcode::G_SMAX:
8722 return CmpInst::ICMP_SGT;
8723 case TargetOpcode::G_UMIN:
8724 return CmpInst::ICMP_ULT;
8725 case TargetOpcode::G_UMAX:
8726 return CmpInst::ICMP_UGT;
8727 default:
8728 llvm_unreachable("not in integer min/max");
8729 }
8730}
8731
8733 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8734
8735 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8736 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::scalar(1));
8737
8738 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8739 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8740
8741 MI.eraseFromParent();
8742 return Legalized;
8743}
8744
8747 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8748
8749 Register Dst = Cmp->getReg(0);
8750 LLT DstTy = MRI.getType(Dst);
8751 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8752 LLT CmpTy = DstTy.changeElementSize(1);
8753
8754 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8757 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8760
8761 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8762 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8763 Cmp->getRHSReg());
8764 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8765 Cmp->getRHSReg());
8766
8767 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8768 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8769 if (TLI.preferSelectsOverBooleanArithmetic(
8770 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8772 auto One = MIRBuilder.buildConstant(DstTy, 1);
8773 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8774
8775 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8776 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8777 } else {
8779 std::swap(IsGT, IsLT);
8780 // Extend boolean results to DstTy, which is at least i2, before subtracting
8781 // them.
8782 unsigned BoolExtOp =
8783 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8784 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8785 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8786 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8787 }
8788
8789 MI.eraseFromParent();
8790 return Legalized;
8791}
8792
8795 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8796 const int Src0Size = Src0Ty.getScalarSizeInBits();
8797 const int Src1Size = Src1Ty.getScalarSizeInBits();
8798
8799 auto SignBitMask = MIRBuilder.buildConstant(
8800 Src0Ty, APInt::getSignMask(Src0Size));
8801
8802 auto NotSignBitMask = MIRBuilder.buildConstant(
8803 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8804
8805 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8806 Register And1;
8807 if (Src0Ty == Src1Ty) {
8808 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8809 } else if (Src0Size > Src1Size) {
8810 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8811 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8812 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8813 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8814 } else {
8815 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8816 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8817 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8818 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8819 }
8820
8821 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8822 // constants are a nan and -0.0, but the final result should preserve
8823 // everything.
8824 unsigned Flags = MI.getFlags();
8825
8826 // We masked the sign bit and the not-sign bit, so these are disjoint.
8827 Flags |= MachineInstr::Disjoint;
8828
8829 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8830
8831 MI.eraseFromParent();
8832 return Legalized;
8833}
8834
8837 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8838 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8839 // depend on fminnum/fmaxnum.
8840
8841 unsigned NewOp;
8842 switch (MI.getOpcode()) {
8843 case TargetOpcode::G_FMINNUM:
8844 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8845 break;
8846 case TargetOpcode::G_FMINIMUMNUM:
8847 NewOp = TargetOpcode::G_FMINNUM;
8848 break;
8849 case TargetOpcode::G_FMAXNUM:
8850 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8851 break;
8852 case TargetOpcode::G_FMAXIMUMNUM:
8853 NewOp = TargetOpcode::G_FMAXNUM;
8854 break;
8855 default:
8856 llvm_unreachable("unexpected min/max opcode");
8857 }
8858
8859 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8860 LLT Ty = MRI.getType(Dst);
8861
8862 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8863 // Insert canonicalizes if it's possible we need to quiet to get correct
8864 // sNaN behavior.
8865
8866 // Note this must be done here, and not as an optimization combine in the
8867 // absence of a dedicate quiet-snan instruction as we're using an
8868 // omni-purpose G_FCANONICALIZE.
8869 if (!isKnownNeverSNaN(Src0, MRI))
8870 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8871
8872 if (!isKnownNeverSNaN(Src1, MRI))
8873 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8874 }
8875
8876 // If there are no nans, it's safe to simply replace this with the non-IEEE
8877 // version.
8878 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8879 MI.eraseFromParent();
8880 return Legalized;
8881}
8882
8885 unsigned Opc = MI.getOpcode();
8886 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8887 LLT Ty = MRI.getType(Dst);
8888 LLT CmpTy = Ty.changeElementSize(1);
8889
8890 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8891 unsigned OpcIeee =
8892 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8893 unsigned OpcNonIeee =
8894 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8895 bool MinMaxMustRespectOrderedZero = false;
8896 Register Res;
8897
8898 // IEEE variants don't need canonicalization
8899 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8900 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8901 MinMaxMustRespectOrderedZero = true;
8902 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8903 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8904 } else {
8905 auto Compare = MIRBuilder.buildFCmp(
8906 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8907 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8908 }
8909
8910 // Propagate any NaN of both operands
8911 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8912 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8913 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8914
8915 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8916 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8917 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8918 if (Ty.isVector())
8919 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8920
8921 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8922 }
8923
8924 // fminimum/fmaximum requires -0.0 less than +0.0
8925 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8926 GISelValueTracking VT(MIRBuilder.getMF());
8927 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8928 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8929
8930 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8931 const unsigned Flags = MI.getFlags();
8932 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8933 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8934
8935 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8936
8937 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8938 auto LHSSelect =
8939 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8940
8941 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8942 auto RHSSelect =
8943 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8944
8945 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8946 }
8947 }
8948
8949 MIRBuilder.buildCopy(Dst, Res);
8950 MI.eraseFromParent();
8951 return Legalized;
8952}
8953
8955 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8956 Register DstReg = MI.getOperand(0).getReg();
8957 LLT Ty = MRI.getType(DstReg);
8958 unsigned Flags = MI.getFlags();
8959
8960 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8961 Flags);
8962 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8963 MI.eraseFromParent();
8964 return Legalized;
8965}
8966
8969 auto [DstReg, X] = MI.getFirst2Regs();
8970 const unsigned Flags = MI.getFlags();
8971 const LLT Ty = MRI.getType(DstReg);
8972 const LLT CondTy = Ty.changeElementSize(1);
8973
8974 // round(x) =>
8975 // t = trunc(x);
8976 // d = fabs(x - t);
8977 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8978 // return t + o;
8979
8980 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8981
8982 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8983 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8984
8985 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8986 auto Cmp =
8987 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8988
8989 // Could emit G_UITOFP instead
8990 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8991 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8992 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8993 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8994
8995 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8996
8997 MI.eraseFromParent();
8998 return Legalized;
8999}
9000
9002 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9003 unsigned Flags = MI.getFlags();
9004 LLT Ty = MRI.getType(DstReg);
9005 const LLT CondTy = Ty.changeElementSize(1);
9006
9007 // result = trunc(src);
9008 // if (src < 0.0 && src != result)
9009 // result += -1.0.
9010
9011 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9012 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9013
9014 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9015 SrcReg, Zero, Flags);
9016 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9017 SrcReg, Trunc, Flags);
9018 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9019 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9020
9021 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9022 MI.eraseFromParent();
9023 return Legalized;
9024}
9025
9028 const unsigned NumOps = MI.getNumOperands();
9029 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9030 unsigned PartSize = Src0Ty.getSizeInBits();
9031
9032 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9033 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9034
9035 for (unsigned I = 2; I != NumOps; ++I) {
9036 const unsigned Offset = (I - 1) * PartSize;
9037
9038 Register SrcReg = MI.getOperand(I).getReg();
9039 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9040
9041 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9042 MRI.createGenericVirtualRegister(WideTy);
9043
9044 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9045 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9046 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9047 ResultReg = NextResult;
9048 }
9049
9050 if (DstTy.isPointer()) {
9051 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9052 DstTy.getAddressSpace())) {
9053 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9054 return UnableToLegalize;
9055 }
9056
9057 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9058 }
9059
9060 MI.eraseFromParent();
9061 return Legalized;
9062}
9063
9066 const unsigned NumDst = MI.getNumOperands() - 1;
9067 Register SrcReg = MI.getOperand(NumDst).getReg();
9068 Register Dst0Reg = MI.getOperand(0).getReg();
9069 LLT DstTy = MRI.getType(Dst0Reg);
9070 if (DstTy.isPointer())
9071 return UnableToLegalize; // TODO
9072
9073 SrcReg = coerceToScalar(SrcReg);
9074 if (!SrcReg)
9075 return UnableToLegalize;
9076
9077 // Expand scalarizing unmerge as bitcast to integer and shift.
9078 LLT IntTy = MRI.getType(SrcReg);
9079
9080 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9081
9082 const unsigned DstSize = DstTy.getSizeInBits();
9083 unsigned Offset = DstSize;
9084 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9085 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9086 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9087 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9088 }
9089
9090 MI.eraseFromParent();
9091 return Legalized;
9092}
9093
9094/// Lower a vector extract or insert by writing the vector to a stack temporary
9095/// and reloading the element or vector.
9096///
9097/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9098/// =>
9099/// %stack_temp = G_FRAME_INDEX
9100/// G_STORE %vec, %stack_temp
9101/// %idx = clamp(%idx, %vec.getNumElements())
9102/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9103/// %dst = G_LOAD %element_ptr
9106 Register DstReg = MI.getOperand(0).getReg();
9107 Register SrcVec = MI.getOperand(1).getReg();
9108 Register InsertVal;
9109 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9110 InsertVal = MI.getOperand(2).getReg();
9111
9112 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9113
9114 LLT VecTy = MRI.getType(SrcVec);
9115 LLT EltTy = VecTy.getElementType();
9116 unsigned NumElts = VecTy.getNumElements();
9117
9118 int64_t IdxVal;
9119 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9121 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9122
9123 if (InsertVal) {
9124 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9125 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9126 } else {
9127 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9128 }
9129
9130 MI.eraseFromParent();
9131 return Legalized;
9132 }
9133
9134 if (!EltTy.isByteSized()) { // Not implemented.
9135 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9136 return UnableToLegalize;
9137 }
9138
9139 unsigned EltBytes = EltTy.getSizeInBytes();
9140 Align VecAlign = getStackTemporaryAlignment(VecTy);
9141 Align EltAlign;
9142
9143 MachinePointerInfo PtrInfo;
9144 auto StackTemp = createStackTemporary(
9145 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9146 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9147
9148 // Get the pointer to the element, and be sure not to hit undefined behavior
9149 // if the index is out of bounds.
9150 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9151
9152 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9153 int64_t Offset = IdxVal * EltBytes;
9154 PtrInfo = PtrInfo.getWithOffset(Offset);
9155 EltAlign = commonAlignment(VecAlign, Offset);
9156 } else {
9157 // We lose information with a variable offset.
9158 EltAlign = getStackTemporaryAlignment(EltTy);
9159 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9160 }
9161
9162 if (InsertVal) {
9163 // Write the inserted element
9164 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9165
9166 // Reload the whole vector.
9167 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9168 } else {
9169 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9170 }
9171
9172 MI.eraseFromParent();
9173 return Legalized;
9174}
9175
9178 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9179 MI.getFirst3RegLLTs();
9180 LLT IdxTy = LLT::scalar(32);
9181
9182 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9183 Register Undef;
9185 LLT EltTy = DstTy.getScalarType();
9186
9187 DenseMap<unsigned, Register> CachedExtract;
9188
9189 for (int Idx : Mask) {
9190 if (Idx < 0) {
9191 if (!Undef.isValid())
9192 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9193 BuildVec.push_back(Undef);
9194 continue;
9195 }
9196
9197 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9198
9199 int NumElts = Src0Ty.getNumElements();
9200 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9201 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9202 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9203 if (Inserted) {
9204 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9205 It->second =
9206 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9207 }
9208 BuildVec.push_back(It->second);
9209 }
9210
9211 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9212 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9213 MI.eraseFromParent();
9214 return Legalized;
9215}
9216
9219 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9220 MI.getFirst4RegLLTs();
9221
9222 if (VecTy.isScalableVector())
9223 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9224
9225 Align VecAlign = getStackTemporaryAlignment(VecTy);
9226 MachinePointerInfo PtrInfo;
9227 Register StackPtr =
9228 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9229 PtrInfo)
9230 .getReg(0);
9231 MachinePointerInfo ValPtrInfo =
9233
9234 LLT IdxTy = LLT::scalar(32);
9235 LLT ValTy = VecTy.getElementType();
9236 Align ValAlign = getStackTemporaryAlignment(ValTy);
9237
9238 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9239
9240 bool HasPassthru =
9241 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9242
9243 if (HasPassthru)
9244 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9245
9246 Register LastWriteVal;
9247 std::optional<APInt> PassthruSplatVal =
9248 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9249
9250 if (PassthruSplatVal.has_value()) {
9251 LastWriteVal =
9252 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9253 } else if (HasPassthru) {
9254 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9255 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9256 {LLT::scalar(32)}, {Popcount});
9257
9258 Register LastElmtPtr =
9259 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9260 LastWriteVal =
9261 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9262 .getReg(0);
9263 }
9264
9265 unsigned NumElmts = VecTy.getNumElements();
9266 for (unsigned I = 0; I < NumElmts; ++I) {
9267 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9268 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9269 Register ElmtPtr =
9270 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9271 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9272
9273 LLT MaskITy = MaskTy.getElementType();
9274 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9275 if (MaskITy.getSizeInBits() > 1)
9276 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9277
9278 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9279 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9280
9281 if (HasPassthru && I == NumElmts - 1) {
9282 auto EndOfVector =
9283 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9284 auto AllLanesSelected = MIRBuilder.buildICmp(
9285 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9286 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9287 {OutPos, EndOfVector});
9288 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9289
9290 LastWriteVal =
9291 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9292 .getReg(0);
9293 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9294 }
9295 }
9296
9297 // TODO: Use StackPtr's FrameIndex alignment.
9298 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9299
9300 MI.eraseFromParent();
9301 return Legalized;
9302}
9303
9305 Register AllocSize,
9306 Align Alignment,
9307 LLT PtrTy) {
9308 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9309
9310 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9311 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9312
9313 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9314 // have to generate an extra instruction to negate the alloc and then use
9315 // G_PTR_ADD to add the negative offset.
9316 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9317 if (Alignment > Align(1)) {
9318 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9319 AlignMask.negate();
9320 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9321 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9322 }
9323
9324 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9325}
9326
9329 const auto &MF = *MI.getMF();
9330 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9331 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9332 return UnableToLegalize;
9333
9334 Register Dst = MI.getOperand(0).getReg();
9335 Register AllocSize = MI.getOperand(1).getReg();
9336 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9337
9338 LLT PtrTy = MRI.getType(Dst);
9339 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9340 Register SPTmp =
9341 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9342
9343 MIRBuilder.buildCopy(SPReg, SPTmp);
9344 MIRBuilder.buildCopy(Dst, SPTmp);
9345
9346 MI.eraseFromParent();
9347 return Legalized;
9348}
9349
9352 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9353 if (!StackPtr)
9354 return UnableToLegalize;
9355
9356 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9357 MI.eraseFromParent();
9358 return Legalized;
9359}
9360
9363 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9364 if (!StackPtr)
9365 return UnableToLegalize;
9366
9367 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9368 MI.eraseFromParent();
9369 return Legalized;
9370}
9371
9374 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9375 unsigned Offset = MI.getOperand(2).getImm();
9376
9377 // Extract sub-vector or one element
9378 if (SrcTy.isVector()) {
9379 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9380 unsigned DstSize = DstTy.getSizeInBits();
9381
9382 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9383 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9384 // Unmerge and allow access to each Src element for the artifact combiner.
9385 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9386
9387 // Take element(s) we need to extract and copy it (merge them).
9388 SmallVector<Register, 8> SubVectorElts;
9389 for (unsigned Idx = Offset / SrcEltSize;
9390 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9391 SubVectorElts.push_back(Unmerge.getReg(Idx));
9392 }
9393 if (SubVectorElts.size() == 1)
9394 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9395 else
9396 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9397
9398 MI.eraseFromParent();
9399 return Legalized;
9400 }
9401 }
9402
9403 if (DstTy.isScalar() &&
9404 (SrcTy.isScalar() ||
9405 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9406 LLT SrcIntTy = SrcTy;
9407 if (!SrcTy.isScalar()) {
9408 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9409 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9410 }
9411
9412 if (Offset == 0)
9413 MIRBuilder.buildTrunc(DstReg, SrcReg);
9414 else {
9415 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9416 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9417 MIRBuilder.buildTrunc(DstReg, Shr);
9418 }
9419
9420 MI.eraseFromParent();
9421 return Legalized;
9422 }
9423
9424 return UnableToLegalize;
9425}
9426
9428 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9429 uint64_t Offset = MI.getOperand(3).getImm();
9430
9431 LLT DstTy = MRI.getType(Src);
9432 LLT InsertTy = MRI.getType(InsertSrc);
9433
9434 // Insert sub-vector or one element
9435 if (DstTy.isVector() && !InsertTy.isPointer()) {
9436 LLT EltTy = DstTy.getElementType();
9437 unsigned EltSize = EltTy.getSizeInBits();
9438 unsigned InsertSize = InsertTy.getSizeInBits();
9439
9440 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9441 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9442 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9444 unsigned Idx = 0;
9445 // Elements from Src before insert start Offset
9446 for (; Idx < Offset / EltSize; ++Idx) {
9447 DstElts.push_back(UnmergeSrc.getReg(Idx));
9448 }
9449
9450 // Replace elements in Src with elements from InsertSrc
9451 if (InsertTy.getSizeInBits() > EltSize) {
9452 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9453 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9454 ++Idx, ++i) {
9455 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9456 }
9457 } else {
9458 DstElts.push_back(InsertSrc);
9459 ++Idx;
9460 }
9461
9462 // Remaining elements from Src after insert
9463 for (; Idx < DstTy.getNumElements(); ++Idx) {
9464 DstElts.push_back(UnmergeSrc.getReg(Idx));
9465 }
9466
9467 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9468 MI.eraseFromParent();
9469 return Legalized;
9470 }
9471 }
9472
9473 if (InsertTy.isVector() ||
9474 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9475 return UnableToLegalize;
9476
9477 const DataLayout &DL = MIRBuilder.getDataLayout();
9478 if ((DstTy.isPointer() &&
9479 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9480 (InsertTy.isPointer() &&
9481 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9482 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9483 return UnableToLegalize;
9484 }
9485
9486 LLT IntDstTy = DstTy;
9487
9488 if (!DstTy.isScalar()) {
9489 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9490 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9491 }
9492
9493 if (!InsertTy.isScalar()) {
9494 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9495 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9496 }
9497
9498 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9499 if (Offset != 0) {
9500 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9501 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9502 }
9503
9505 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9506
9507 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9508 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9509 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9510
9511 MIRBuilder.buildCast(Dst, Or);
9512 MI.eraseFromParent();
9513 return Legalized;
9514}
9515
9518 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9519 MI.getFirst4RegLLTs();
9520 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9521
9522 LLT Ty = Dst0Ty;
9523 LLT BoolTy = Dst1Ty;
9524
9525 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9526
9527 if (IsAdd)
9528 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9529 else
9530 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9531
9532 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9533
9534 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9535
9536 // For an addition, the result should be less than one of the operands (LHS)
9537 // if and only if the other operand (RHS) is negative, otherwise there will
9538 // be overflow.
9539 // For a subtraction, the result should be less than one of the operands
9540 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9541 // otherwise there will be overflow.
9542 auto ResultLowerThanLHS =
9543 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9544 auto ConditionRHS = MIRBuilder.buildICmp(
9545 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9546
9547 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9548
9549 MIRBuilder.buildCopy(Dst0, NewDst0);
9550 MI.eraseFromParent();
9551
9552 return Legalized;
9553}
9554
9556 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9557 const LLT Ty = MRI.getType(Res);
9558
9559 // sum = LHS + RHS + zext(CarryIn)
9560 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9561 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9562 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9563 MIRBuilder.buildCopy(Res, Sum);
9564
9565 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9566 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9567 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9568 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9569
9570 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9571 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9572
9573 MI.eraseFromParent();
9574 return Legalized;
9575}
9576
9578 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9579 const LLT Ty = MRI.getType(Res);
9580
9581 // Diff = LHS - (RHS + zext(CarryIn))
9582 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9583 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9584 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9585 MIRBuilder.buildCopy(Res, Diff);
9586
9587 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9588 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9589 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9590 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9591 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9592 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9593
9594 MI.eraseFromParent();
9595 return Legalized;
9596}
9597
9600 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9601 LLT Ty = MRI.getType(Res);
9602 bool IsSigned;
9603 bool IsAdd;
9604 unsigned BaseOp;
9605 switch (MI.getOpcode()) {
9606 default:
9607 llvm_unreachable("unexpected addsat/subsat opcode");
9608 case TargetOpcode::G_UADDSAT:
9609 IsSigned = false;
9610 IsAdd = true;
9611 BaseOp = TargetOpcode::G_ADD;
9612 break;
9613 case TargetOpcode::G_SADDSAT:
9614 IsSigned = true;
9615 IsAdd = true;
9616 BaseOp = TargetOpcode::G_ADD;
9617 break;
9618 case TargetOpcode::G_USUBSAT:
9619 IsSigned = false;
9620 IsAdd = false;
9621 BaseOp = TargetOpcode::G_SUB;
9622 break;
9623 case TargetOpcode::G_SSUBSAT:
9624 IsSigned = true;
9625 IsAdd = false;
9626 BaseOp = TargetOpcode::G_SUB;
9627 break;
9628 }
9629
9630 if (IsSigned) {
9631 // sadd.sat(a, b) ->
9632 // hi = 0x7fffffff - smax(a, 0)
9633 // lo = 0x80000000 - smin(a, 0)
9634 // a + smin(smax(lo, b), hi)
9635 // ssub.sat(a, b) ->
9636 // lo = smax(a, -1) - 0x7fffffff
9637 // hi = smin(a, -1) - 0x80000000
9638 // a - smin(smax(lo, b), hi)
9639 // TODO: AMDGPU can use a "median of 3" instruction here:
9640 // a +/- med3(lo, b, hi)
9641 uint64_t NumBits = Ty.getScalarSizeInBits();
9642 auto MaxVal =
9643 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9644 auto MinVal =
9645 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9647 if (IsAdd) {
9648 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9649 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9650 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9651 } else {
9652 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9653 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9654 MaxVal);
9655 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9656 MinVal);
9657 }
9658 auto RHSClamped =
9659 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9660 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9661 } else {
9662 // uadd.sat(a, b) -> a + umin(~a, b)
9663 // usub.sat(a, b) -> a - umin(a, b)
9664 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9665 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9666 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9667 }
9668
9669 MI.eraseFromParent();
9670 return Legalized;
9671}
9672
9675 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9676 LLT Ty = MRI.getType(Res);
9677 LLT BoolTy = Ty.changeElementSize(1);
9678 bool IsSigned;
9679 bool IsAdd;
9680 unsigned OverflowOp;
9681 switch (MI.getOpcode()) {
9682 default:
9683 llvm_unreachable("unexpected addsat/subsat opcode");
9684 case TargetOpcode::G_UADDSAT:
9685 IsSigned = false;
9686 IsAdd = true;
9687 OverflowOp = TargetOpcode::G_UADDO;
9688 break;
9689 case TargetOpcode::G_SADDSAT:
9690 IsSigned = true;
9691 IsAdd = true;
9692 OverflowOp = TargetOpcode::G_SADDO;
9693 break;
9694 case TargetOpcode::G_USUBSAT:
9695 IsSigned = false;
9696 IsAdd = false;
9697 OverflowOp = TargetOpcode::G_USUBO;
9698 break;
9699 case TargetOpcode::G_SSUBSAT:
9700 IsSigned = true;
9701 IsAdd = false;
9702 OverflowOp = TargetOpcode::G_SSUBO;
9703 break;
9704 }
9705
9706 auto OverflowRes =
9707 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9708 Register Tmp = OverflowRes.getReg(0);
9709 Register Ov = OverflowRes.getReg(1);
9710 MachineInstrBuilder Clamp;
9711 if (IsSigned) {
9712 // sadd.sat(a, b) ->
9713 // {tmp, ov} = saddo(a, b)
9714 // ov ? (tmp >>s 31) + 0x80000000 : r
9715 // ssub.sat(a, b) ->
9716 // {tmp, ov} = ssubo(a, b)
9717 // ov ? (tmp >>s 31) + 0x80000000 : r
9718 uint64_t NumBits = Ty.getScalarSizeInBits();
9719 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9720 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9721 auto MinVal =
9722 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9723 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9724 } else {
9725 // uadd.sat(a, b) ->
9726 // {tmp, ov} = uaddo(a, b)
9727 // ov ? 0xffffffff : tmp
9728 // usub.sat(a, b) ->
9729 // {tmp, ov} = usubo(a, b)
9730 // ov ? 0 : tmp
9731 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9732 }
9733 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9734
9735 MI.eraseFromParent();
9736 return Legalized;
9737}
9738
9741 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9742 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9743 "Expected shlsat opcode!");
9744 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9745 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9746 LLT Ty = MRI.getType(Res);
9747 LLT BoolTy = Ty.changeElementSize(1);
9748
9749 unsigned BW = Ty.getScalarSizeInBits();
9750 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9751 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9752 : MIRBuilder.buildLShr(Ty, Result, RHS);
9753
9754 MachineInstrBuilder SatVal;
9755 if (IsSigned) {
9756 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9757 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9758 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9759 MIRBuilder.buildConstant(Ty, 0));
9760 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9761 } else {
9762 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9763 }
9764 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9765 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9766
9767 MI.eraseFromParent();
9768 return Legalized;
9769}
9770
9772 auto [Dst, Src] = MI.getFirst2Regs();
9773 const LLT Ty = MRI.getType(Src);
9774 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9775 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9776
9777 // Swap most and least significant byte, set remaining bytes in Res to zero.
9778 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9779 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9780 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9781 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9782
9783 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9784 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9785 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9786 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9787 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9788 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9789 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9790 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9791 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9792 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9793 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9794 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9795 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9796 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9797 }
9798 Res.getInstr()->getOperand(0).setReg(Dst);
9799
9800 MI.eraseFromParent();
9801 return Legalized;
9802}
9803
9804//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9806 MachineInstrBuilder Src, const APInt &Mask) {
9807 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9808 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9809 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9810 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9811 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9812 return B.buildOr(Dst, LHS, RHS);
9813}
9814
9817 auto [Dst, Src] = MI.getFirst2Regs();
9818 const LLT SrcTy = MRI.getType(Src);
9819 unsigned Size = SrcTy.getScalarSizeInBits();
9820 unsigned VSize = SrcTy.getSizeInBits();
9821
9822 if (Size >= 8) {
9823 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9824 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9825 {LLT::fixed_vector(VSize / 8, 8),
9826 LLT::fixed_vector(VSize / 8, 8)}}))) {
9827 // If bitreverse is legal for i8 vector of the same size, then cast
9828 // to i8 vector type.
9829 // e.g. v4s32 -> v16s8
9830 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9831 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9832 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9833 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9834 MIRBuilder.buildBitcast(Dst, RBIT);
9835 } else {
9836 MachineInstrBuilder BSWAP =
9837 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9838
9839 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9840 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9841 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9842 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9843 APInt::getSplat(Size, APInt(8, 0xF0)));
9844
9845 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9846 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9847 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9848 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9849 APInt::getSplat(Size, APInt(8, 0xCC)));
9850
9851 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9852 // 6|7
9853 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9854 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9855 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9856 }
9857 } else {
9858 // Expand bitreverse for types smaller than 8 bits.
9860 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9862 if (I < J) {
9863 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9864 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9865 } else {
9866 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9867 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9868 }
9869
9870 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9871 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9872 if (I == 0)
9873 Tmp = Tmp2;
9874 else
9875 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9876 }
9877 MIRBuilder.buildCopy(Dst, Tmp);
9878 }
9879
9880 MI.eraseFromParent();
9881 return Legalized;
9882}
9883
9886 MachineFunction &MF = MIRBuilder.getMF();
9887
9888 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9889 int NameOpIdx = IsRead ? 1 : 0;
9890 int ValRegIndex = IsRead ? 0 : 1;
9891
9892 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9893 const LLT Ty = MRI.getType(ValReg);
9894 const MDString *RegStr = cast<MDString>(
9895 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9896
9897 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9898 if (!PhysReg) {
9899 const Function &Fn = MF.getFunction();
9901 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9902 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9903 Fn, MI.getDebugLoc()));
9904 if (IsRead)
9905 MIRBuilder.buildUndef(ValReg);
9906
9907 MI.eraseFromParent();
9908 return Legalized;
9909 }
9910
9911 if (IsRead)
9912 MIRBuilder.buildCopy(ValReg, PhysReg);
9913 else
9914 MIRBuilder.buildCopy(PhysReg, ValReg);
9915
9916 MI.eraseFromParent();
9917 return Legalized;
9918}
9919
9922 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9923 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9924 Register Result = MI.getOperand(0).getReg();
9925 LLT OrigTy = MRI.getType(Result);
9926 auto SizeInBits = OrigTy.getScalarSizeInBits();
9927 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9928
9929 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9930 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9931 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9932 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9933
9934 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9935 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9936 MIRBuilder.buildTrunc(Result, Shifted);
9937
9938 MI.eraseFromParent();
9939 return Legalized;
9940}
9941
9944 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9945 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9946
9947 if (Mask == fcNone) {
9948 MIRBuilder.buildConstant(DstReg, 0);
9949 MI.eraseFromParent();
9950 return Legalized;
9951 }
9952 if (Mask == fcAllFlags) {
9953 MIRBuilder.buildConstant(DstReg, 1);
9954 MI.eraseFromParent();
9955 return Legalized;
9956 }
9957
9958 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9959 // version
9960
9961 unsigned BitSize = SrcTy.getScalarSizeInBits();
9962 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9963
9964 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
9965 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9966
9967 // Various masks.
9968 APInt SignBit = APInt::getSignMask(BitSize);
9969 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9970 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9971 APInt ExpMask = Inf;
9972 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9973 APInt QNaNBitMask =
9974 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9975 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9976
9977 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9978 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9979 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9980 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9981 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9982
9983 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9984 auto Sign =
9985 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9986
9987 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9988 // Clang doesn't support capture of structured bindings:
9989 LLT DstTyCopy = DstTy;
9990 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9991 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9992 };
9993
9994 // Tests that involve more than one class should be processed first.
9995 if ((Mask & fcFinite) == fcFinite) {
9996 // finite(V) ==> abs(V) u< exp_mask
9997 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9998 ExpMaskC));
9999 Mask &= ~fcFinite;
10000 } else if ((Mask & fcFinite) == fcPosFinite) {
10001 // finite(V) && V > 0 ==> V u< exp_mask
10002 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10003 ExpMaskC));
10004 Mask &= ~fcPosFinite;
10005 } else if ((Mask & fcFinite) == fcNegFinite) {
10006 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10007 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10008 ExpMaskC);
10009 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10010 appendToRes(And);
10011 Mask &= ~fcNegFinite;
10012 }
10013
10014 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10015 // fcZero | fcSubnormal => test all exponent bits are 0
10016 // TODO: Handle sign bit specific cases
10017 // TODO: Handle inverted case
10018 if (PartialCheck == (fcZero | fcSubnormal)) {
10019 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10020 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10021 ExpBits, ZeroC));
10022 Mask &= ~PartialCheck;
10023 }
10024 }
10025
10026 // Check for individual classes.
10027 if (FPClassTest PartialCheck = Mask & fcZero) {
10028 if (PartialCheck == fcPosZero)
10029 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10030 AsInt, ZeroC));
10031 else if (PartialCheck == fcZero)
10032 appendToRes(
10033 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10034 else // fcNegZero
10035 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10036 AsInt, SignBitC));
10037 }
10038
10039 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10040 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10041 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10042 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10043 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10044 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10045 auto SubnormalRes =
10046 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10047 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10048 if (PartialCheck == fcNegSubnormal)
10049 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10050 appendToRes(SubnormalRes);
10051 }
10052
10053 if (FPClassTest PartialCheck = Mask & fcInf) {
10054 if (PartialCheck == fcPosInf)
10055 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10056 AsInt, InfC));
10057 else if (PartialCheck == fcInf)
10058 appendToRes(
10059 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10060 else { // fcNegInf
10061 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10062 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10063 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10064 AsInt, NegInfC));
10065 }
10066 }
10067
10068 if (FPClassTest PartialCheck = Mask & fcNan) {
10069 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10070 if (PartialCheck == fcNan) {
10071 // isnan(V) ==> abs(V) u> int(inf)
10072 appendToRes(
10073 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10074 } else if (PartialCheck == fcQNan) {
10075 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10076 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10077 InfWithQnanBitC));
10078 } else { // fcSNan
10079 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10080 // abs(V) u< (unsigned(Inf) | quiet_bit)
10081 auto IsNan =
10082 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10083 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10084 Abs, InfWithQnanBitC);
10085 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10086 }
10087 }
10088
10089 if (FPClassTest PartialCheck = Mask & fcNormal) {
10090 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10091 // (max_exp-1))
10092 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10093 auto ExpMinusOne = MIRBuilder.buildSub(
10094 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10095 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10096 auto NormalRes =
10097 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10098 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10099 if (PartialCheck == fcNegNormal)
10100 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10101 else if (PartialCheck == fcPosNormal) {
10102 auto PosSign = MIRBuilder.buildXor(
10103 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10104 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10105 }
10106 appendToRes(NormalRes);
10107 }
10108
10109 MIRBuilder.buildCopy(DstReg, Res);
10110 MI.eraseFromParent();
10111 return Legalized;
10112}
10113
10115 // Implement G_SELECT in terms of XOR, AND, OR.
10116 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10117 MI.getFirst4RegLLTs();
10118
10119 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10120 if (IsEltPtr) {
10121 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10122 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10123 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10124 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10125 DstTy = NewTy;
10126 }
10127
10128 if (MaskTy.isScalar()) {
10129 // Turn the scalar condition into a vector condition mask if needed.
10130
10131 Register MaskElt = MaskReg;
10132
10133 // The condition was potentially zero extended before, but we want a sign
10134 // extended boolean.
10135 if (MaskTy != LLT::scalar(1))
10136 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10137
10138 // Continue the sign extension (or truncate) to match the data type.
10139 MaskElt =
10140 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10141
10142 if (DstTy.isVector()) {
10143 // Generate a vector splat idiom.
10144 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10145 MaskReg = ShufSplat.getReg(0);
10146 } else {
10147 MaskReg = MaskElt;
10148 }
10149 MaskTy = DstTy;
10150 } else if (!DstTy.isVector()) {
10151 // Cannot handle the case that mask is a vector and dst is a scalar.
10152 return UnableToLegalize;
10153 }
10154
10155 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10156 return UnableToLegalize;
10157 }
10158
10159 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10160 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10161 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10162 if (IsEltPtr) {
10163 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10164 MIRBuilder.buildIntToPtr(DstReg, Or);
10165 } else {
10166 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10167 }
10168 MI.eraseFromParent();
10169 return Legalized;
10170}
10171
10173 // Split DIVREM into individual instructions.
10174 unsigned Opcode = MI.getOpcode();
10175
10176 MIRBuilder.buildInstr(
10177 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10178 : TargetOpcode::G_UDIV,
10179 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10180 MIRBuilder.buildInstr(
10181 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10182 : TargetOpcode::G_UREM,
10183 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10184 MI.eraseFromParent();
10185 return Legalized;
10186}
10187
10190 // Expand %res = G_ABS %a into:
10191 // %v1 = G_ASHR %a, scalar_size-1
10192 // %v2 = G_ADD %a, %v1
10193 // %res = G_XOR %v2, %v1
10194 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10195 Register OpReg = MI.getOperand(1).getReg();
10196 auto ShiftAmt =
10197 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10198 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10199 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10200 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10201 MI.eraseFromParent();
10202 return Legalized;
10203}
10204
10207 // Expand %res = G_ABS %a into:
10208 // %v1 = G_CONSTANT 0
10209 // %v2 = G_SUB %v1, %a
10210 // %res = G_SMAX %a, %v2
10211 Register SrcReg = MI.getOperand(1).getReg();
10212 LLT Ty = MRI.getType(SrcReg);
10213 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10214 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10215 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10216 MI.eraseFromParent();
10217 return Legalized;
10218}
10219
10222 Register SrcReg = MI.getOperand(1).getReg();
10223 Register DestReg = MI.getOperand(0).getReg();
10224 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10225 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10226 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10227 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10228 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10229 MI.eraseFromParent();
10230 return Legalized;
10231}
10232
10235 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10236 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10237 "Expected G_ABDS or G_ABDU instruction");
10238
10239 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10240 LLT Ty = MRI.getType(LHS);
10241
10242 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10243 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10244 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10245 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10246 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10249 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10250 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10251
10252 MI.eraseFromParent();
10253 return Legalized;
10254}
10255
10258 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10259 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10260 "Expected G_ABDS or G_ABDU instruction");
10261
10262 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10263 LLT Ty = MRI.getType(LHS);
10264
10265 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10266 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10267 Register MaxReg, MinReg;
10268 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10269 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10270 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10271 } else {
10272 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10273 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10274 }
10275 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10276
10277 MI.eraseFromParent();
10278 return Legalized;
10279}
10280
10282 Register SrcReg = MI.getOperand(1).getReg();
10283 Register DstReg = MI.getOperand(0).getReg();
10284
10285 LLT Ty = MRI.getType(DstReg);
10286
10287 // Reset sign bit
10288 MIRBuilder.buildAnd(
10289 DstReg, SrcReg,
10290 MIRBuilder.buildConstant(
10291 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10292
10293 MI.eraseFromParent();
10294 return Legalized;
10295}
10296
10299 Register SrcReg = MI.getOperand(1).getReg();
10300 LLT SrcTy = MRI.getType(SrcReg);
10301 LLT DstTy = MRI.getType(SrcReg);
10302
10303 // The source could be a scalar if the IR type was <1 x sN>.
10304 if (SrcTy.isScalar()) {
10305 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10306 return UnableToLegalize; // FIXME: handle extension.
10307 // This can be just a plain copy.
10308 Observer.changingInstr(MI);
10309 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10310 Observer.changedInstr(MI);
10311 return Legalized;
10312 }
10313 return UnableToLegalize;
10314}
10315
10317 MachineFunction &MF = *MI.getMF();
10318 const DataLayout &DL = MIRBuilder.getDataLayout();
10319 LLVMContext &Ctx = MF.getFunction().getContext();
10320 Register ListPtr = MI.getOperand(1).getReg();
10321 LLT PtrTy = MRI.getType(ListPtr);
10322
10323 // LstPtr is a pointer to the head of the list. Get the address
10324 // of the head of the list.
10325 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10326 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10327 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10328 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10329
10330 const Align A(MI.getOperand(2).getImm());
10331 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10332 if (A > TLI.getMinStackArgumentAlignment()) {
10333 Register AlignAmt =
10334 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10335 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10336 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10337 VAList = AndDst.getReg(0);
10338 }
10339
10340 // Increment the pointer, VAList, to the next vaarg
10341 // The list should be bumped by the size of element in the current head of
10342 // list.
10343 Register Dst = MI.getOperand(0).getReg();
10344 LLT LLTTy = MRI.getType(Dst);
10345 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10346 auto IncAmt =
10347 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10348 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10349
10350 // Store the increment VAList to the legalized pointer
10352 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10353 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10354 // Load the actual argument out of the pointer VAList
10355 Align EltAlignment = DL.getABITypeAlign(Ty);
10356 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10357 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10358 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10359
10360 MI.eraseFromParent();
10361 return Legalized;
10362}
10363
10365 // On Darwin, -Os means optimize for size without hurting performance, so
10366 // only really optimize for size when -Oz (MinSize) is used.
10368 return MF.getFunction().hasMinSize();
10369 return MF.getFunction().hasOptSize();
10370}
10371
10372// Returns a list of types to use for memory op lowering in MemOps. A partial
10373// port of findOptimalMemOpLowering in TargetLowering.
10374static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10375 unsigned Limit, const MemOp &Op,
10376 unsigned DstAS, unsigned SrcAS,
10377 const AttributeList &FuncAttributes,
10378 const TargetLowering &TLI) {
10379 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10380 return false;
10381
10382 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10383
10384 if (Ty == LLT()) {
10385 // Use the largest scalar type whose alignment constraints are satisfied.
10386 // We only need to check DstAlign here as SrcAlign is always greater or
10387 // equal to DstAlign (or zero).
10388 Ty = LLT::scalar(64);
10389 if (Op.isFixedDstAlign())
10390 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10391 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10392 Ty = LLT::scalar(Ty.getSizeInBytes());
10393 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10394 // FIXME: check for the largest legal type we can load/store to.
10395 }
10396
10397 unsigned NumMemOps = 0;
10398 uint64_t Size = Op.size();
10399 while (Size) {
10400 unsigned TySize = Ty.getSizeInBytes();
10401 while (TySize > Size) {
10402 // For now, only use non-vector load / store's for the left-over pieces.
10403 LLT NewTy = Ty;
10404 // FIXME: check for mem op safety and legality of the types. Not all of
10405 // SDAGisms map cleanly to GISel concepts.
10406 if (NewTy.isVector())
10407 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10408 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10409 unsigned NewTySize = NewTy.getSizeInBytes();
10410 assert(NewTySize > 0 && "Could not find appropriate type");
10411
10412 // If the new LLT cannot cover all of the remaining bits, then consider
10413 // issuing a (or a pair of) unaligned and overlapping load / store.
10414 unsigned Fast;
10415 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10416 MVT VT = getMVTForLLT(Ty);
10417 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10419 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10421 Fast)
10422 TySize = Size;
10423 else {
10424 Ty = NewTy;
10425 TySize = NewTySize;
10426 }
10427 }
10428
10429 if (++NumMemOps > Limit)
10430 return false;
10431
10432 MemOps.push_back(Ty);
10433 Size -= TySize;
10434 }
10435
10436 return true;
10437}
10438
10439// Get a vectorized representation of the memset value operand, GISel edition.
10441 MachineRegisterInfo &MRI = *MIB.getMRI();
10442 unsigned NumBits = Ty.getScalarSizeInBits();
10443 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10444 if (!Ty.isVector() && ValVRegAndVal) {
10445 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10446 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10447 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10448 }
10449
10450 // Extend the byte value to the larger type, and then multiply by a magic
10451 // value 0x010101... in order to replicate it across every byte.
10452 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10453 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10454 return MIB.buildConstant(Ty, 0).getReg(0);
10455 }
10456
10457 LLT ExtType = Ty.getScalarType();
10458 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10459 if (NumBits > 8) {
10460 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10461 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10462 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10463 }
10464
10465 // For vector types create a G_BUILD_VECTOR.
10466 if (Ty.isVector())
10467 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10468
10469 return Val;
10470}
10471
10473LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10474 uint64_t KnownLen, Align Alignment,
10475 bool IsVolatile) {
10476 auto &MF = *MI.getParent()->getParent();
10477 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10478 auto &DL = MF.getDataLayout();
10479 LLVMContext &C = MF.getFunction().getContext();
10480
10481 assert(KnownLen != 0 && "Have a zero length memset length!");
10482
10483 bool DstAlignCanChange = false;
10484 MachineFrameInfo &MFI = MF.getFrameInfo();
10485 bool OptSize = shouldLowerMemFuncForSize(MF);
10486
10487 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10488 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10489 DstAlignCanChange = true;
10490
10491 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10492 std::vector<LLT> MemOps;
10493
10494 const auto &DstMMO = **MI.memoperands_begin();
10495 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10496
10497 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10498 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10499
10500 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10501 MemOp::Set(KnownLen, DstAlignCanChange,
10502 Alignment,
10503 /*IsZeroMemset=*/IsZeroVal,
10504 /*IsVolatile=*/IsVolatile),
10505 DstPtrInfo.getAddrSpace(), ~0u,
10506 MF.getFunction().getAttributes(), TLI))
10507 return UnableToLegalize;
10508
10509 if (DstAlignCanChange) {
10510 // Get an estimate of the type from the LLT.
10511 Type *IRTy = getTypeForLLT(MemOps[0], C);
10512 Align NewAlign = DL.getABITypeAlign(IRTy);
10513 if (NewAlign > Alignment) {
10514 Alignment = NewAlign;
10515 unsigned FI = FIDef->getOperand(1).getIndex();
10516 // Give the stack frame object a larger alignment if needed.
10517 if (MFI.getObjectAlign(FI) < Alignment)
10518 MFI.setObjectAlignment(FI, Alignment);
10519 }
10520 }
10521
10522 MachineIRBuilder MIB(MI);
10523 // Find the largest store and generate the bit pattern for it.
10524 LLT LargestTy = MemOps[0];
10525 for (unsigned i = 1; i < MemOps.size(); i++)
10526 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10527 LargestTy = MemOps[i];
10528
10529 // The memset stored value is always defined as an s8, so in order to make it
10530 // work with larger store types we need to repeat the bit pattern across the
10531 // wider type.
10532 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10533
10534 if (!MemSetValue)
10535 return UnableToLegalize;
10536
10537 // Generate the stores. For each store type in the list, we generate the
10538 // matching store of that type to the destination address.
10539 LLT PtrTy = MRI.getType(Dst);
10540 unsigned DstOff = 0;
10541 unsigned Size = KnownLen;
10542 for (unsigned I = 0; I < MemOps.size(); I++) {
10543 LLT Ty = MemOps[I];
10544 unsigned TySize = Ty.getSizeInBytes();
10545 if (TySize > Size) {
10546 // Issuing an unaligned load / store pair that overlaps with the previous
10547 // pair. Adjust the offset accordingly.
10548 assert(I == MemOps.size() - 1 && I != 0);
10549 DstOff -= TySize - Size;
10550 }
10551
10552 // If this store is smaller than the largest store see whether we can get
10553 // the smaller value for free with a truncate.
10554 Register Value = MemSetValue;
10555 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10556 MVT VT = getMVTForLLT(Ty);
10557 MVT LargestVT = getMVTForLLT(LargestTy);
10558 if (!LargestTy.isVector() && !Ty.isVector() &&
10559 TLI.isTruncateFree(LargestVT, VT))
10560 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10561 else
10562 Value = getMemsetValue(Val, Ty, MIB);
10563 if (!Value)
10564 return UnableToLegalize;
10565 }
10566
10567 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10568
10569 Register Ptr = Dst;
10570 if (DstOff != 0) {
10571 auto Offset =
10572 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10573 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10574 }
10575
10576 MIB.buildStore(Value, Ptr, *StoreMMO);
10577 DstOff += Ty.getSizeInBytes();
10578 Size -= TySize;
10579 }
10580
10581 MI.eraseFromParent();
10582 return Legalized;
10583}
10584
10586LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10587 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10588
10589 auto [Dst, Src, Len] = MI.getFirst3Regs();
10590
10591 const auto *MMOIt = MI.memoperands_begin();
10592 const MachineMemOperand *MemOp = *MMOIt;
10593 bool IsVolatile = MemOp->isVolatile();
10594
10595 // See if this is a constant length copy
10596 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10597 // FIXME: support dynamically sized G_MEMCPY_INLINE
10598 assert(LenVRegAndVal &&
10599 "inline memcpy with dynamic size is not yet supported");
10600 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10601 if (KnownLen == 0) {
10602 MI.eraseFromParent();
10603 return Legalized;
10604 }
10605
10606 const auto &DstMMO = **MI.memoperands_begin();
10607 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10608 Align DstAlign = DstMMO.getBaseAlign();
10609 Align SrcAlign = SrcMMO.getBaseAlign();
10610
10611 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10612 IsVolatile);
10613}
10614
10616LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10617 uint64_t KnownLen, Align DstAlign,
10618 Align SrcAlign, bool IsVolatile) {
10619 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10620 return lowerMemcpy(MI, Dst, Src, KnownLen,
10621 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10622 IsVolatile);
10623}
10624
10626LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10627 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10628 Align SrcAlign, bool IsVolatile) {
10629 auto &MF = *MI.getParent()->getParent();
10630 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10631 auto &DL = MF.getDataLayout();
10633
10634 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10635
10636 bool DstAlignCanChange = false;
10637 MachineFrameInfo &MFI = MF.getFrameInfo();
10638 Align Alignment = std::min(DstAlign, SrcAlign);
10639
10640 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10641 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10642 DstAlignCanChange = true;
10643
10644 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10645 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10646 // if the memcpy is in a tail call position.
10647
10648 std::vector<LLT> MemOps;
10649
10650 const auto &DstMMO = **MI.memoperands_begin();
10651 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10652 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10653 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10654
10656 MemOps, Limit,
10657 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10658 IsVolatile),
10659 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10660 MF.getFunction().getAttributes(), TLI))
10661 return UnableToLegalize;
10662
10663 if (DstAlignCanChange) {
10664 // Get an estimate of the type from the LLT.
10665 Type *IRTy = getTypeForLLT(MemOps[0], C);
10666 Align NewAlign = DL.getABITypeAlign(IRTy);
10667
10668 // Don't promote to an alignment that would require dynamic stack
10669 // realignment.
10671 if (!TRI->hasStackRealignment(MF))
10672 if (MaybeAlign StackAlign = DL.getStackAlignment())
10673 NewAlign = std::min(NewAlign, *StackAlign);
10674
10675 if (NewAlign > Alignment) {
10676 Alignment = NewAlign;
10677 unsigned FI = FIDef->getOperand(1).getIndex();
10678 // Give the stack frame object a larger alignment if needed.
10679 if (MFI.getObjectAlign(FI) < Alignment)
10680 MFI.setObjectAlignment(FI, Alignment);
10681 }
10682 }
10683
10684 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10685
10686 MachineIRBuilder MIB(MI);
10687 // Now we need to emit a pair of load and stores for each of the types we've
10688 // collected. I.e. for each type, generate a load from the source pointer of
10689 // that type width, and then generate a corresponding store to the dest buffer
10690 // of that value loaded. This can result in a sequence of loads and stores
10691 // mixed types, depending on what the target specifies as good types to use.
10692 unsigned CurrOffset = 0;
10693 unsigned Size = KnownLen;
10694 for (auto CopyTy : MemOps) {
10695 // Issuing an unaligned load / store pair that overlaps with the previous
10696 // pair. Adjust the offset accordingly.
10697 if (CopyTy.getSizeInBytes() > Size)
10698 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10699
10700 // Construct MMOs for the accesses.
10701 auto *LoadMMO =
10702 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10703 auto *StoreMMO =
10704 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10705
10706 // Create the load.
10707 Register LoadPtr = Src;
10709 if (CurrOffset != 0) {
10710 LLT SrcTy = MRI.getType(Src);
10711 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10712 .getReg(0);
10713 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10714 }
10715 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10716
10717 // Create the store.
10718 Register StorePtr = Dst;
10719 if (CurrOffset != 0) {
10720 LLT DstTy = MRI.getType(Dst);
10721 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10722 }
10723 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10724 CurrOffset += CopyTy.getSizeInBytes();
10725 Size -= CopyTy.getSizeInBytes();
10726 }
10727
10728 MI.eraseFromParent();
10729 return Legalized;
10730}
10731
10733LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10734 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10735 bool IsVolatile) {
10736 auto &MF = *MI.getParent()->getParent();
10737 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10738 auto &DL = MF.getDataLayout();
10739 LLVMContext &C = MF.getFunction().getContext();
10740
10741 assert(KnownLen != 0 && "Have a zero length memmove length!");
10742
10743 bool DstAlignCanChange = false;
10744 MachineFrameInfo &MFI = MF.getFrameInfo();
10745 bool OptSize = shouldLowerMemFuncForSize(MF);
10746 Align Alignment = std::min(DstAlign, SrcAlign);
10747
10748 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10749 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10750 DstAlignCanChange = true;
10751
10752 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10753 std::vector<LLT> MemOps;
10754
10755 const auto &DstMMO = **MI.memoperands_begin();
10756 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10757 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10758 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10759
10760 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10761 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10762 // same thing here.
10764 MemOps, Limit,
10765 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10766 /*IsVolatile*/ true),
10767 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10768 MF.getFunction().getAttributes(), TLI))
10769 return UnableToLegalize;
10770
10771 if (DstAlignCanChange) {
10772 // Get an estimate of the type from the LLT.
10773 Type *IRTy = getTypeForLLT(MemOps[0], C);
10774 Align NewAlign = DL.getABITypeAlign(IRTy);
10775
10776 // Don't promote to an alignment that would require dynamic stack
10777 // realignment.
10778 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10779 if (!TRI->hasStackRealignment(MF))
10780 if (MaybeAlign StackAlign = DL.getStackAlignment())
10781 NewAlign = std::min(NewAlign, *StackAlign);
10782
10783 if (NewAlign > Alignment) {
10784 Alignment = NewAlign;
10785 unsigned FI = FIDef->getOperand(1).getIndex();
10786 // Give the stack frame object a larger alignment if needed.
10787 if (MFI.getObjectAlign(FI) < Alignment)
10788 MFI.setObjectAlignment(FI, Alignment);
10789 }
10790 }
10791
10792 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10793
10794 MachineIRBuilder MIB(MI);
10795 // Memmove requires that we perform the loads first before issuing the stores.
10796 // Apart from that, this loop is pretty much doing the same thing as the
10797 // memcpy codegen function.
10798 unsigned CurrOffset = 0;
10799 SmallVector<Register, 16> LoadVals;
10800 for (auto CopyTy : MemOps) {
10801 // Construct MMO for the load.
10802 auto *LoadMMO =
10803 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10804
10805 // Create the load.
10806 Register LoadPtr = Src;
10807 if (CurrOffset != 0) {
10808 LLT SrcTy = MRI.getType(Src);
10809 auto Offset =
10810 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10811 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10812 }
10813 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10814 CurrOffset += CopyTy.getSizeInBytes();
10815 }
10816
10817 CurrOffset = 0;
10818 for (unsigned I = 0; I < MemOps.size(); ++I) {
10819 LLT CopyTy = MemOps[I];
10820 // Now store the values loaded.
10821 auto *StoreMMO =
10822 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10823
10824 Register StorePtr = Dst;
10825 if (CurrOffset != 0) {
10826 LLT DstTy = MRI.getType(Dst);
10827 auto Offset =
10828 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10829 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10830 }
10831 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10832 CurrOffset += CopyTy.getSizeInBytes();
10833 }
10834 MI.eraseFromParent();
10835 return Legalized;
10836}
10837
10840 const unsigned Opc = MI.getOpcode();
10841 // This combine is fairly complex so it's not written with a separate
10842 // matcher function.
10843 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10844 Opc == TargetOpcode::G_MEMSET) &&
10845 "Expected memcpy like instruction");
10846
10847 auto MMOIt = MI.memoperands_begin();
10848 const MachineMemOperand *MemOp = *MMOIt;
10849
10850 Align DstAlign = MemOp->getBaseAlign();
10851 Align SrcAlign;
10852 auto [Dst, Src, Len] = MI.getFirst3Regs();
10853
10854 if (Opc != TargetOpcode::G_MEMSET) {
10855 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10856 MemOp = *(++MMOIt);
10857 SrcAlign = MemOp->getBaseAlign();
10858 }
10859
10860 // See if this is a constant length copy
10861 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10862 if (!LenVRegAndVal)
10863 return UnableToLegalize;
10864 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10865
10866 if (KnownLen == 0) {
10867 MI.eraseFromParent();
10868 return Legalized;
10869 }
10870
10871 if (MaxLen && KnownLen > MaxLen)
10872 return UnableToLegalize;
10873
10874 bool IsVolatile = MemOp->isVolatile();
10875 if (Opc == TargetOpcode::G_MEMCPY) {
10876 auto &MF = *MI.getParent()->getParent();
10877 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10878 bool OptSize = shouldLowerMemFuncForSize(MF);
10879 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10880 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10881 IsVolatile);
10882 }
10883 if (Opc == TargetOpcode::G_MEMMOVE)
10884 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10885 if (Opc == TargetOpcode::G_MEMSET)
10886 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10887 return UnableToLegalize;
10888}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1410
APInt bitcastToAPInt() const
Definition APFloat.h:1416
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1201
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1161
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1172
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1477
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:325
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:624
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:632
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2042
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:654
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1571
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1628
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1195
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:509
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1883
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:349
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1283
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:612
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.