LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::integer(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::integer(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
525 MachineRegisterInfo &MRI) {
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
644 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
645}
646
647// Useful for libcalls where all operands have the same type.
650 unsigned Size, Type *OpType,
651 LostDebugLocObserver &LocObserver) const {
652 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
653
654 // FIXME: What does the original arg index mean here?
656 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
657 Args.push_back({MO.getReg(), OpType, 0});
658 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
659 LocObserver, &MI);
660}
661
662LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
663 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
664 LostDebugLocObserver &LocObserver) {
665 MachineFunction &MF = *MI.getMF();
667
668 Register DstSin = MI.getOperand(0).getReg();
669 Register DstCos = MI.getOperand(1).getReg();
670 Register Src = MI.getOperand(2).getReg();
671 LLT DstTy = MRI.getType(DstSin);
672
673 int MemSize = DstTy.getSizeInBytes();
674 Align Alignment = getStackTemporaryAlignment(DstTy);
676 unsigned AddrSpace = DL.getAllocaAddrSpace();
677 MachinePointerInfo PtrInfo;
678
679 Register StackPtrSin =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682 Register StackPtrCos =
683 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
684 .getReg(0);
685
686 auto &Ctx = MF.getFunction().getContext();
687 auto LibcallResult = createLibcall(
688 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
689 {{Src, OpType, 0},
690 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
691 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
692 LocObserver, &MI);
693
694 if (LibcallResult != LegalizeResult::Legalized)
696
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701
702 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
705
707}
708
710LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
711 unsigned Size, Type *OpType,
712 LostDebugLocObserver &LocObserver) {
713 MachineFunction &MF = MIRBuilder.getMF();
714 MachineRegisterInfo &MRI = MF.getRegInfo();
715
716 Register DstFrac = MI.getOperand(0).getReg();
717 Register DstInt = MI.getOperand(1).getReg();
718 Register Src = MI.getOperand(2).getReg();
719 LLT DstTy = MRI.getType(DstFrac);
720
721 int MemSize = DstTy.getSizeInBytes();
722 Align Alignment = getStackTemporaryAlignment(DstTy);
723 const DataLayout &DL = MIRBuilder.getDataLayout();
724 unsigned AddrSpace = DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
726
727 Register StackPtrInt =
728 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
729 .getReg(0);
730
731 auto &Ctx = MF.getFunction().getContext();
732 auto LibcallResult = createLibcall(
733 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
735 LocObserver, &MI);
736
737 if (LibcallResult != LegalizeResult::Legalized)
739
741 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
742
743 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
745
747}
748
749static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
750 Type *FromType) {
751 auto ToMVT = MVT::getVT(ToType);
752 auto FromMVT = MVT::getVT(FromType);
753
754 switch (Opcode) {
755 case TargetOpcode::G_FPEXT:
756 return RTLIB::getFPEXT(FromMVT, ToMVT);
757 case TargetOpcode::G_FPTRUNC:
758 return RTLIB::getFPROUND(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTOSI:
760 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOUI:
762 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
763 case TargetOpcode::G_SITOFP:
764 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
765 case TargetOpcode::G_UITOFP:
766 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
767 }
768 llvm_unreachable("Unsupported libcall function");
769}
770
772 MachineInstr &MI, Type *ToType, Type *FromType,
773 LostDebugLocObserver &LocObserver, bool IsSigned) const {
774 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.Flags[0].setSExt();
778 else
779 Arg.Flags[0].setZExt();
780 }
781
782 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
783 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
784 LocObserver, &MI);
785}
786
789 LostDebugLocObserver &LocObserver) const {
790 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791
793 // Add all the args, except for the last which is an imm denoting 'tail'.
794 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
795 Register Reg = MI.getOperand(i).getReg();
796
797 // Need derive an IR type for call lowering.
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy = nullptr;
800 if (OpLLT.isPointer())
801 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
802 else
803 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
804 Args.push_back({Reg, OpTy, 0});
805 }
806
807 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc = MI.getOpcode();
810 switch (Opc) {
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
813 break;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
817 break;
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
821 break;
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
825 break;
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 if (!Libcalls) // FIXME: Should be mandatory
832
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (RTLibcallImpl == RTLIB::Unsupported) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
844
845 StringRef LibcallName =
847 Info.Callee = MachineOperand::CreateES(LibcallName.data());
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 if (!Libcalls) // FIXME: Should be mandatory
985
986 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
987 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
989
990 // Unsupported libcall on the target.
991 if (RTLibcallImpl == RTLIB::Unsupported) {
992 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
993 << MIRBuilder.getTII().getName(Opc) << "\n");
995 }
996
998 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
999
1000 StringRef LibcallName =
1002 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1003 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1004
1005 llvm::append_range(Info.OrigArgs, Args);
1006 if (!CLI.lowerCall(MIRBuilder, Info))
1008
1010}
1011
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1018 break;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1022 break;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1025 break;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1029 break;
1030 default:
1031 llvm_unreachable("Unexpected opcode");
1032 }
1033 return RTLibcall;
1034}
1035
1036// Some library functions that read FP state (fegetmode, fegetenv) write the
1037// state into a region in memory. IR intrinsics that do the same operations
1038// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1039// intrinsics via the library functions, we need to use temporary variable,
1040// for example:
1041//
1042// %0:_(s32) = G_GET_FPMODE
1043//
1044// is transformed to:
1045//
1046// %1:_(p0) = G_FRAME_INDEX %stack.0
1047// BL &fegetmode
1048// %0:_(s32) = G_LOAD % 1
1049//
1051LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1052 LostDebugLocObserver &LocObserver) {
1053 const DataLayout &DL = MIRBuilder.getDataLayout();
1054 auto &MF = MIRBuilder.getMF();
1055 auto &MRI = *MIRBuilder.getMRI();
1056 auto &Ctx = MF.getFunction().getContext();
1057
1058 // Create temporary, where library function will put the read state.
1059 Register Dst = MI.getOperand(0).getReg();
1060 LLT StateTy = MRI.getType(Dst);
1061 TypeSize StateSize = StateTy.getSizeInBytes();
1062 Align TempAlign = getStackTemporaryAlignment(StateTy);
1063 MachinePointerInfo TempPtrInfo;
1064 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1065
1066 // Create a call to library function, with the temporary as an argument.
1067 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1068 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1069 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1070 auto Res = createLibcall(
1071 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1073 nullptr);
1074 if (Res != LegalizerHelper::Legalized)
1075 return Res;
1076
1077 // Create a load from the temporary.
1078 MachineMemOperand *MMO = MF.getMachineMemOperand(
1079 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1081
1083}
1084
1085// Similar to `createGetStateLibcall` the function calls a library function
1086// using transient space in stack. In this case the library function reads
1087// content of memory region.
1089LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1090 LostDebugLocObserver &LocObserver) {
1091 const DataLayout &DL = MIRBuilder.getDataLayout();
1092 auto &MF = MIRBuilder.getMF();
1093 auto &MRI = *MIRBuilder.getMRI();
1094 auto &Ctx = MF.getFunction().getContext();
1095
1096 // Create temporary, where library function will get the new state.
1097 Register Src = MI.getOperand(0).getReg();
1098 LLT StateTy = MRI.getType(Src);
1099 TypeSize StateSize = StateTy.getSizeInBytes();
1100 Align TempAlign = getStackTemporaryAlignment(StateTy);
1101 MachinePointerInfo TempPtrInfo;
1102 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1103
1104 // Put the new state into the temporary.
1105 MachineMemOperand *MMO = MF.getMachineMemOperand(
1106 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1107 MIRBuilder.buildStore(Src, Temp, *MMO);
1108
1109 // Create a call to library function, with the temporary as an argument.
1110 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1111 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1112 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1113 return createLibcall(RTLibcall,
1114 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver, nullptr);
1117}
1118
1119/// Returns the corresponding libcall for the given Pred and
1120/// the ICMP predicate that should be generated to compare with #0
1121/// after the libcall.
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1125 do { \
1126 switch (Size) { \
1127 case 32: \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1129 case 64: \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1131 case 128: \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1133 default: \
1134 llvm_unreachable("unexpected size"); \
1135 } \
1136 } while (0)
1137
1138 switch (Pred) {
1139 case CmpInst::FCMP_OEQ:
1141 case CmpInst::FCMP_UNE:
1143 case CmpInst::FCMP_OGE:
1145 case CmpInst::FCMP_OLT:
1147 case CmpInst::FCMP_OLE:
1149 case CmpInst::FCMP_OGT:
1151 case CmpInst::FCMP_UNO:
1153 default:
1154 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1155 }
1156}
1157
1159LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1160 LostDebugLocObserver &LocObserver) {
1161 auto &MF = MIRBuilder.getMF();
1162 auto &Ctx = MF.getFunction().getContext();
1163 const GFCmp *Cmp = cast<GFCmp>(&MI);
1164
1165 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1166 unsigned Size = OpLLT.getSizeInBits();
1167 if ((Size != 32 && Size != 64 && Size != 128) ||
1168 OpLLT != MRI.getType(Cmp->getRHSReg()))
1169 return UnableToLegalize;
1170
1171 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1172
1173 // DstReg type is s32
1174 const Register DstReg = Cmp->getReg(0);
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond = Cmp->getCond();
1177
1178 // Reference:
1179 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1180 // Generates a libcall followed by ICMP.
1181 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1182 const CmpInst::Predicate ICmpPred,
1183 const DstOp &Res) -> Register {
1184 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1185 LLT TempLLT = LLT::integer(32);
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1187 // Generate libcall, holding result in Temp
1188 const auto Status = createLibcall(
1189 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1190 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1191 LocObserver, &MI);
1192 if (!Status)
1193 return {};
1194
1195 // Compare temp with #0 to get the final result.
1196 return MIRBuilder
1197 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1198 .getReg(0);
1199 };
1200
1201 // Simple case if we have a direct mapping from predicate to libcall
1202 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1204 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1205 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1206 return Legalized;
1207 }
1208 return UnableToLegalize;
1209 }
1210
1211 // No direct mapping found, should be generated as combination of libcalls.
1212
1213 switch (Cond) {
1214 case CmpInst::FCMP_UEQ: {
1215 // FCMP_UEQ: unordered or equal
1216 // Convert into (FCMP_OEQ || FCMP_UNO).
1217
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1221
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1225 if (Oeq && Uno)
1226 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1227 else
1228 return UnableToLegalize;
1229
1230 break;
1231 }
1232 case CmpInst::FCMP_ONE: {
1233 // FCMP_ONE: ordered and operands are unequal
1234 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1235
1236 // We inverse the predicate instead of generating a NOT
1237 // to save one instruction.
1238 // On AArch64 isel can even select two cmp into a single ccmp.
1239 const auto [OeqLibcall, OeqPred] =
1241 const auto NotOeq =
1242 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1243
1244 const auto [UnoLibcall, UnoPred] =
1246 const auto NotUno =
1247 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1248
1249 if (NotOeq && NotUno)
1250 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1251 else
1252 return UnableToLegalize;
1253
1254 break;
1255 }
1256 case CmpInst::FCMP_ULT:
1257 case CmpInst::FCMP_UGE:
1258 case CmpInst::FCMP_UGT:
1259 case CmpInst::FCMP_ULE:
1260 case CmpInst::FCMP_ORD: {
1261 // Convert into: !(inverse(Pred))
1262 // E.g. FCMP_ULT becomes !FCMP_OGE
1263 // This is equivalent to the following, but saves some instructions.
1264 // MIRBuilder.buildNot(
1265 // PredTy,
1266 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1267 // Op1, Op2));
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1271 CmpInst::getInversePredicate(InversedPred), DstReg))
1272 return UnableToLegalize;
1273 break;
1274 }
1275 default:
1276 return UnableToLegalize;
1277 }
1278
1279 return Legalized;
1280}
1281
1282// The function is used to legalize operations that set default environment
1283// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1284// On most targets supported in glibc FE_DFL_MODE is defined as
1285// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1286// it is not true, the target must provide custom lowering.
1288LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1289 LostDebugLocObserver &LocObserver) {
1290 const DataLayout &DL = MIRBuilder.getDataLayout();
1291 auto &MF = MIRBuilder.getMF();
1292 auto &Ctx = MF.getFunction().getContext();
1293
1294 // Create an argument for the library function.
1295 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1296 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1297 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1298 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1299 auto DefValue = MIRBuilder.buildConstant(LLT::integer(PtrSize), -1LL);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1301 MIRBuilder.buildIntToPtr(Dest, DefValue);
1302
1303 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1304 return createLibcall(
1305 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1307}
1308
1311 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1312
1313 switch (MI.getOpcode()) {
1314 default:
1315 return UnableToLegalize;
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = IntegerType::get(Ctx, Size);
1325 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 break;
1329 }
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1366 unsigned Size = LLTy.getSizeInBits();
1367 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1368 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1369 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1370 return UnableToLegalize;
1371 }
1372 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1373 if (Status != Legalized)
1374 return Status;
1375 break;
1376 }
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1379 unsigned Size = LLTy.getSizeInBits();
1380 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1381 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1382 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1383 return UnableToLegalize;
1384 }
1385 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1386 }
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1389 unsigned Size = LLTy.getSizeInBits();
1390 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1391 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1392 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1393 return UnableToLegalize;
1394 }
1395 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1396 }
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1402 unsigned Size = LLTy.getSizeInBits();
1403 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1404 Type *ITy = IntegerType::get(
1405 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1406 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1407 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1408 return UnableToLegalize;
1409 }
1410 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1412 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1413 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1414 if (Status != Legalized)
1415 return Status;
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1422 unsigned Size = LLTy.getSizeInBits();
1423 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1424 Type *ITy = IntegerType::get(
1425 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1426 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1427 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1428 return UnableToLegalize;
1429 }
1430 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1432 {MI.getOperand(1).getReg(), HLTy, 0},
1433 {MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1447 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 FromTy, LocObserver);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1483 if (Status != Legalized)
1484 return Status;
1485 break;
1486 }
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 if (Status != Legalized)
1497 return Status;
1498 break;
1499 }
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1504 LegalizeResult Result =
1505 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1506 if (Result != Legalized)
1507 return Result;
1508 MI.eraseFromParent();
1509 return Result;
1510 }
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1513 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1514 if (Result != Legalized)
1515 return Result;
1516 break;
1517 }
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1520 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1521 if (Result != Legalized)
1522 return Result;
1523 break;
1524 }
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1527 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1528 if (Result != Legalized)
1529 return Result;
1530 break;
1531 }
1532 }
1533
1534 MI.eraseFromParent();
1535 return Legalized;
1536}
1537
1539 unsigned TypeIdx,
1540 LLT NarrowTy) {
1541 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1542 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 return UnableToLegalize;
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 LLT DstTy = MRI.getType(DstReg);
1550
1551 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1552 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1553 // FIXME: Although this would also be legal for the general case, it causes
1554 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1555 // combines not being hit). This seems to be a problem related to the
1556 // artifact combiner.
1557 if (SizeOp0 % NarrowSize != 0) {
1558 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1559 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1560 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565
1566 int NumParts = SizeOp0 / NarrowSize;
1567
1569 for (int i = 0; i < NumParts; ++i)
1570 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1571
1572 if (DstTy.isVector())
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1574 else
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1577 return Legalized;
1578 }
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1581 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1583 unsigned NarrowSize = NarrowTy.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1585
1586 SmallVector<Register, 4> PartRegs;
1587 for (int I = 0; I != NumParts; ++I) {
1588 unsigned Offset = I * NarrowSize;
1589 auto K = MIRBuilder.buildConstant(NarrowTy,
1590 Val.lshr(Offset).trunc(NarrowSize));
1591 PartRegs.push_back(K.getReg(0));
1592 }
1593
1594 LLT LeftoverTy;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1596 SmallVector<Register, 1> LeftoverRegs;
1597 if (LeftoverBits != 0) {
1598 LeftoverTy = LLT::scalar(LeftoverBits);
1599 auto K = MIRBuilder.buildConstant(
1600 LeftoverTy,
1601 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1602 LeftoverRegs.push_back(K.getReg(0));
1603 }
1604
1605 insertParts(MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1607
1608 MI.eraseFromParent();
1609 return Legalized;
1610 }
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1614 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1615 case TargetOpcode::G_TRUNC: {
1616 if (TypeIdx != 1)
1617 return UnableToLegalize;
1618
1619 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1620 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1621 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1622 return UnableToLegalize;
1623 }
1624
1625 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1626 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1628 return Legalized;
1629 }
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1632 if (TypeIdx != 0)
1633 return UnableToLegalize;
1634
1635 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1636 // Should widen scalar first
1637 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1638 return UnableToLegalize;
1639
1640 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1642 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1643 Parts.push_back(
1644 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1645 .getReg(0));
1646 }
1647
1648 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1650 return Legalized;
1651 }
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1662 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1665 return narrowScalarMul(MI, NarrowTy);
1666 case TargetOpcode::G_EXTRACT:
1667 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1668 case TargetOpcode::G_INSERT:
1669 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_LOAD: {
1671 auto &LoadMI = cast<GLoad>(MI);
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1674 if (DstTy.isVector())
1675 return UnableToLegalize;
1676
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1680 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1681 LoadMI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1686 }
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD:
1689 case TargetOpcode::G_FPEXTLOAD: {
1690 auto &LoadMI = cast<GExtLoad>(MI);
1691 Register DstReg = LoadMI.getDstReg();
1692 Register PtrReg = LoadMI.getPointerReg();
1693
1694 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1695 auto &MMO = LoadMI.getMMO();
1696 unsigned MemSize = MMO.getSizeInBits().getValue();
1697
1698 if (MemSize == NarrowSize) {
1699 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1700 } else if (MemSize < NarrowSize) {
1701 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1702 } else if (MemSize > NarrowSize) {
1703 // FIXME: Need to split the load.
1704 return UnableToLegalize;
1705 }
1706
1707 if (isa<GZExtLoad>(LoadMI))
1708 MIRBuilder.buildZExt(DstReg, TmpReg);
1709 else if (isa<GSExtLoad>(LoadMI))
1710 MIRBuilder.buildSExt(DstReg, TmpReg);
1711 else
1712 MIRBuilder.buildFPExt(DstReg, TmpReg);
1713
1714 LoadMI.eraseFromParent();
1715 return Legalized;
1716 }
1717 case TargetOpcode::G_STORE: {
1718 auto &StoreMI = cast<GStore>(MI);
1719
1720 Register SrcReg = StoreMI.getValueReg();
1721 LLT SrcTy = MRI.getType(SrcReg);
1722 if (SrcTy.isVector())
1723 return UnableToLegalize;
1724
1725 int NumParts = SizeOp0 / NarrowSize;
1726 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1727 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1728 if (SrcTy.isVector() && LeftoverBits != 0)
1729 return UnableToLegalize;
1730
1731 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1732 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1733 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1734 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1735 StoreMI.eraseFromParent();
1736 return Legalized;
1737 }
1738
1739 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1740 }
1741 case TargetOpcode::G_FPTRUNCSTORE: {
1742 auto &StoreMI = cast<GFPTruncStore>(MI);
1743 Register SrcReg = StoreMI.getValueReg();
1744 Register PtrReg = StoreMI.getPointerReg();
1745
1746 auto &MMO = StoreMI.getMMO();
1747 unsigned MemSize = MMO.getSizeInBits().getValue();
1748 if (MemSize > NarrowSize) {
1749 return UnableToLegalize;
1750 }
1751
1752 auto TmpReg = MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1753 if (MemSize == NarrowSize) {
1754 MIRBuilder.buildStore(TmpReg, PtrReg, MMO);
1755 } else if (MemSize < NarrowSize) {
1756 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1757 MMO);
1758 }
1759
1760 StoreMI.eraseFromParent();
1761 return Legalized;
1762 }
1763 case TargetOpcode::G_SELECT:
1764 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1765 case TargetOpcode::G_AND:
1766 case TargetOpcode::G_OR:
1767 case TargetOpcode::G_XOR: {
1768 // Legalize bitwise operation:
1769 // A = BinOp<Ty> B, C
1770 // into:
1771 // B1, ..., BN = G_UNMERGE_VALUES B
1772 // C1, ..., CN = G_UNMERGE_VALUES C
1773 // A1 = BinOp<Ty/N> B1, C2
1774 // ...
1775 // AN = BinOp<Ty/N> BN, CN
1776 // A = G_MERGE_VALUES A1, ..., AN
1777 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1778 }
1779 case TargetOpcode::G_SHL:
1780 case TargetOpcode::G_LSHR:
1781 case TargetOpcode::G_ASHR:
1782 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1783 case TargetOpcode::G_CTLZ:
1784 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1785 case TargetOpcode::G_CTTZ:
1786 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1787 case TargetOpcode::G_CTLS:
1788 case TargetOpcode::G_CTPOP:
1789 if (TypeIdx == 1)
1790 switch (MI.getOpcode()) {
1791 case TargetOpcode::G_CTLZ:
1792 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1793 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1794 case TargetOpcode::G_CTTZ:
1795 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1796 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1797 case TargetOpcode::G_CTPOP:
1798 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1799 case TargetOpcode::G_CTLS:
1800 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1801 default:
1802 return UnableToLegalize;
1803 }
1804
1805 Observer.changingInstr(MI);
1806 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1807 Observer.changedInstr(MI);
1808 return Legalized;
1809 case TargetOpcode::G_INTTOPTR:
1810 if (TypeIdx != 1)
1811 return UnableToLegalize;
1812
1813 Observer.changingInstr(MI);
1814 narrowScalarSrc(MI, NarrowTy, 1);
1815 Observer.changedInstr(MI);
1816 return Legalized;
1817 case TargetOpcode::G_PTRTOINT:
1818 if (TypeIdx != 0)
1819 return UnableToLegalize;
1820
1821 Observer.changingInstr(MI);
1822 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1823 Observer.changedInstr(MI);
1824 return Legalized;
1825 case TargetOpcode::G_PHI: {
1826 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1827 // NarrowSize.
1828 if (SizeOp0 % NarrowSize != 0)
1829 return UnableToLegalize;
1830
1831 unsigned NumParts = SizeOp0 / NarrowSize;
1832 SmallVector<Register, 2> DstRegs(NumParts);
1833 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1834 Observer.changingInstr(MI);
1835 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1836 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1837 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1838 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1839 SrcRegs[i / 2], MIRBuilder, MRI);
1840 }
1841 MachineBasicBlock &MBB = *MI.getParent();
1842 MIRBuilder.setInsertPt(MBB, MI);
1843 for (unsigned i = 0; i < NumParts; ++i) {
1844 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1846 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1847 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1848 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1849 }
1850 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1851 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1852 Observer.changedInstr(MI);
1853 MI.eraseFromParent();
1854 return Legalized;
1855 }
1856 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1857 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1858 if (TypeIdx != 2)
1859 return UnableToLegalize;
1860
1861 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1862 Observer.changingInstr(MI);
1863 narrowScalarSrc(MI, NarrowTy, OpIdx);
1864 Observer.changedInstr(MI);
1865 return Legalized;
1866 }
1867 case TargetOpcode::G_ICMP: {
1868 Register LHS = MI.getOperand(2).getReg();
1869 LLT SrcTy = MRI.getType(LHS);
1870 CmpInst::Predicate Pred =
1871 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1872
1873 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1874 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1875 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1876 LHSLeftoverRegs, MIRBuilder, MRI))
1877 return UnableToLegalize;
1878
1879 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1880 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1881 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1882 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1883 return UnableToLegalize;
1884
1885 // We now have the LHS and RHS of the compare split into narrow-type
1886 // registers, plus potentially some leftover type.
1887 Register Dst = MI.getOperand(0).getReg();
1888 LLT ResTy = MRI.getType(Dst);
1889 if (ICmpInst::isEquality(Pred)) {
1890 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1891 // them together. For each equal part, the result should be all 0s. For
1892 // each non-equal part, we'll get at least one 1.
1893 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1895 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1896 auto LHS = std::get<0>(LHSAndRHS);
1897 auto RHS = std::get<1>(LHSAndRHS);
1898 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1899 Xors.push_back(Xor);
1900 }
1901
1902 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1903 // to the desired narrow type so that we can OR them together later.
1904 SmallVector<Register, 4> WidenedXors;
1905 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1906 auto LHS = std::get<0>(LHSAndRHS);
1907 auto RHS = std::get<1>(LHSAndRHS);
1908 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1909 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1910 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1911 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1912 llvm::append_range(Xors, WidenedXors);
1913 }
1914
1915 // Now, for each part we broke up, we know if they are equal/not equal
1916 // based off the G_XOR. We can OR these all together and compare against
1917 // 0 to get the result.
1918 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1919 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1920 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1921 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1922 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1923 } else {
1924 Register CmpIn;
1925 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1926 Register CmpOut;
1927 CmpInst::Predicate PartPred;
1928
1929 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1930 PartPred = Pred;
1931 CmpOut = Dst;
1932 } else {
1933 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1934 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1935 }
1936
1937 if (!CmpIn) {
1938 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1939 RHSPartRegs[I]);
1940 } else {
1941 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1942 RHSPartRegs[I]);
1943 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1944 LHSPartRegs[I], RHSPartRegs[I]);
1945 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1946 }
1947
1948 CmpIn = CmpOut;
1949 }
1950
1951 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1952 Register CmpOut;
1953 CmpInst::Predicate PartPred;
1954
1955 if (I == E - 1) {
1956 PartPred = Pred;
1957 CmpOut = Dst;
1958 } else {
1959 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1960 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1961 }
1962
1963 if (!CmpIn) {
1964 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1965 RHSLeftoverRegs[I]);
1966 } else {
1967 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1968 RHSLeftoverRegs[I]);
1969 auto CmpEq =
1970 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1971 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1972 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1973 }
1974
1975 CmpIn = CmpOut;
1976 }
1977 }
1978 MI.eraseFromParent();
1979 return Legalized;
1980 }
1981 case TargetOpcode::G_FCMP:
1982 if (TypeIdx != 0)
1983 return UnableToLegalize;
1984
1985 Observer.changingInstr(MI);
1986 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1987 Observer.changedInstr(MI);
1988 return Legalized;
1989
1990 case TargetOpcode::G_SEXT_INREG: {
1991 if (TypeIdx != 0)
1992 return UnableToLegalize;
1993
1994 int64_t SizeInBits = MI.getOperand(2).getImm();
1995
1996 // So long as the new type has more bits than the bits we're extending we
1997 // don't need to break it apart.
1998 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1999 Observer.changingInstr(MI);
2000 // We don't lose any non-extension bits by truncating the src and
2001 // sign-extending the dst.
2002 MachineOperand &MO1 = MI.getOperand(1);
2003 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
2004 MO1.setReg(TruncMIB.getReg(0));
2005
2006 MachineOperand &MO2 = MI.getOperand(0);
2007 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2008 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2009 MIRBuilder.buildSExt(MO2, DstExt);
2010 MO2.setReg(DstExt);
2011 Observer.changedInstr(MI);
2012 return Legalized;
2013 }
2014
2015 // Break it apart. Components below the extension point are unmodified. The
2016 // component containing the extension point becomes a narrower SEXT_INREG.
2017 // Components above it are ashr'd from the component containing the
2018 // extension point.
2019 if (SizeOp0 % NarrowSize != 0)
2020 return UnableToLegalize;
2021 int NumParts = SizeOp0 / NarrowSize;
2022
2023 // List the registers where the destination will be scattered.
2025 // List the registers where the source will be split.
2027
2028 // Create all the temporary registers.
2029 for (int i = 0; i < NumParts; ++i) {
2030 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2031
2032 SrcRegs.push_back(SrcReg);
2033 }
2034
2035 // Explode the big arguments into smaller chunks.
2036 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2037
2038 Register AshrCstReg =
2039 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2040 .getReg(0);
2041 Register FullExtensionReg;
2042 Register PartialExtensionReg;
2043
2044 // Do the operation on each small part.
2045 for (int i = 0; i < NumParts; ++i) {
2046 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2047 DstRegs.push_back(SrcRegs[i]);
2048 PartialExtensionReg = DstRegs.back();
2049 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2050 assert(PartialExtensionReg &&
2051 "Expected to visit partial extension before full");
2052 if (FullExtensionReg) {
2053 DstRegs.push_back(FullExtensionReg);
2054 continue;
2055 }
2056 DstRegs.push_back(
2057 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2058 .getReg(0));
2059 FullExtensionReg = DstRegs.back();
2060 } else {
2061 DstRegs.push_back(
2063 .buildInstr(
2064 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2065 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2066 .getReg(0));
2067 PartialExtensionReg = DstRegs.back();
2068 }
2069 }
2070
2071 // Gather the destination registers into the final destination.
2072 Register DstReg = MI.getOperand(0).getReg();
2073 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2074 MI.eraseFromParent();
2075 return Legalized;
2076 }
2077 case TargetOpcode::G_BSWAP:
2078 case TargetOpcode::G_BITREVERSE: {
2079 if (SizeOp0 % NarrowSize != 0)
2080 return UnableToLegalize;
2081
2082 Observer.changingInstr(MI);
2083 SmallVector<Register, 2> SrcRegs, DstRegs;
2084 unsigned NumParts = SizeOp0 / NarrowSize;
2085 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2086 MIRBuilder, MRI);
2087
2088 for (unsigned i = 0; i < NumParts; ++i) {
2089 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2090 {SrcRegs[NumParts - 1 - i]});
2091 DstRegs.push_back(DstPart.getReg(0));
2092 }
2093
2094 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2095
2096 Observer.changedInstr(MI);
2097 MI.eraseFromParent();
2098 return Legalized;
2099 }
2100 case TargetOpcode::G_PTR_ADD:
2101 case TargetOpcode::G_PTRMASK: {
2102 if (TypeIdx != 1)
2103 return UnableToLegalize;
2104 Observer.changingInstr(MI);
2105 narrowScalarSrc(MI, NarrowTy, 2);
2106 Observer.changedInstr(MI);
2107 return Legalized;
2108 }
2109 case TargetOpcode::G_FPTOUI:
2110 case TargetOpcode::G_FPTOSI:
2111 case TargetOpcode::G_FPTOUI_SAT:
2112 case TargetOpcode::G_FPTOSI_SAT:
2113 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2114 case TargetOpcode::G_FPEXT:
2115 if (TypeIdx != 0)
2116 return UnableToLegalize;
2117 Observer.changingInstr(MI);
2118 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2119 Observer.changedInstr(MI);
2120 return Legalized;
2121 case TargetOpcode::G_FLDEXP:
2122 case TargetOpcode::G_STRICT_FLDEXP:
2123 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2124 case TargetOpcode::G_VSCALE: {
2125 Register Dst = MI.getOperand(0).getReg();
2126 LLT Ty = MRI.getType(Dst);
2127
2128 // Assume VSCALE(1) fits into a legal integer
2129 const APInt One(NarrowTy.getSizeInBits(), 1);
2130 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2131 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2132 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2133 MIRBuilder.buildMul(Dst, ZExt, C);
2134
2135 MI.eraseFromParent();
2136 return Legalized;
2137 }
2138 }
2139}
2140
2142 LLT Ty = MRI.getType(Val);
2143 if (Ty.isScalar())
2144 return Val;
2145
2146 const DataLayout &DL = MIRBuilder.getDataLayout();
2147 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2148 if (Ty.isPointer()) {
2149 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2150 return Register();
2151 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2152 }
2153
2154 Register NewVal = Val;
2155
2156 assert(Ty.isVector());
2157 if (Ty.isPointerVector())
2158 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2159 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2160}
2161
2163 unsigned OpIdx, unsigned ExtOpcode) {
2164 MachineOperand &MO = MI.getOperand(OpIdx);
2165 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2166 MO.setReg(ExtB.getReg(0));
2167}
2168
2170 unsigned OpIdx) {
2171 MachineOperand &MO = MI.getOperand(OpIdx);
2172 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2173 MO.setReg(ExtB.getReg(0));
2174}
2175
2177 unsigned OpIdx, unsigned TruncOpcode) {
2178 MachineOperand &MO = MI.getOperand(OpIdx);
2179 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2180 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2181 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2182 MO.setReg(DstExt);
2183}
2184
2186 unsigned OpIdx, unsigned ExtOpcode) {
2187 MachineOperand &MO = MI.getOperand(OpIdx);
2188 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2189 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2190 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2191 MO.setReg(DstTrunc);
2192}
2193
2195 unsigned OpIdx) {
2196 MachineOperand &MO = MI.getOperand(OpIdx);
2197 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2198 Register Dst = MO.getReg();
2199 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2200 MO.setReg(DstExt);
2201 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2202}
2203
2205 unsigned OpIdx) {
2206 MachineOperand &MO = MI.getOperand(OpIdx);
2207 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2208}
2209
2211 MachineOperand &Op = MI.getOperand(OpIdx);
2212 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2213}
2214
2216 MachineOperand &MO = MI.getOperand(OpIdx);
2217 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2218 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2219 MIRBuilder.buildBitcast(MO, CastDst);
2220 MO.setReg(CastDst);
2221}
2222
2224LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2225 LLT WideTy) {
2226 if (TypeIdx != 1)
2227 return UnableToLegalize;
2228
2229 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2230 if (DstTy.isVector())
2231 return UnableToLegalize;
2232
2233 LLT SrcTy = MRI.getType(Src1Reg);
2234 const int DstSize = DstTy.getSizeInBits();
2235 const int SrcSize = SrcTy.getSizeInBits();
2236 const int WideSize = WideTy.getSizeInBits();
2237 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2238
2239 unsigned NumOps = MI.getNumOperands();
2240 unsigned NumSrc = MI.getNumOperands() - 1;
2241 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2242
2243 if (WideSize >= DstSize) {
2244 // Directly pack the bits in the target type.
2245 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2246
2247 for (unsigned I = 2; I != NumOps; ++I) {
2248 const unsigned Offset = (I - 1) * PartSize;
2249
2250 Register SrcReg = MI.getOperand(I).getReg();
2251 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2252
2253 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2254
2255 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2256 MRI.createGenericVirtualRegister(WideTy);
2257
2258 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2259 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2260 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2261 ResultReg = NextResult;
2262 }
2263
2264 if (WideSize > DstSize)
2265 MIRBuilder.buildTrunc(DstReg, ResultReg);
2266 else if (DstTy.isPointer())
2267 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2268
2269 MI.eraseFromParent();
2270 return Legalized;
2271 }
2272
2273 // Unmerge the original values to the GCD type, and recombine to the next
2274 // multiple greater than the original type.
2275 //
2276 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2277 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2278 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2279 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2280 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2281 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2282 // %12:_(s12) = G_MERGE_VALUES %10, %11
2283 //
2284 // Padding with undef if necessary:
2285 //
2286 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2287 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2288 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2289 // %7:_(s2) = G_IMPLICIT_DEF
2290 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2291 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2292 // %10:_(s12) = G_MERGE_VALUES %8, %9
2293
2294 const int GCD = std::gcd(SrcSize, WideSize);
2295 LLT GCDTy = LLT::scalar(GCD);
2296
2297 SmallVector<Register, 8> NewMergeRegs;
2298 SmallVector<Register, 8> Unmerges;
2299 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2300
2301 // Decompose the original operands if they don't evenly divide.
2302 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2303 Register SrcReg = MO.getReg();
2304 if (GCD == SrcSize) {
2305 Unmerges.push_back(SrcReg);
2306 } else {
2307 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2308 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2309 Unmerges.push_back(Unmerge.getReg(J));
2310 }
2311 }
2312
2313 // Pad with undef to the next size that is a multiple of the requested size.
2314 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2315 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2316 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2317 Unmerges.push_back(UndefReg);
2318 }
2319
2320 const int PartsPerGCD = WideSize / GCD;
2321
2322 // Build merges of each piece.
2323 ArrayRef<Register> Slicer(Unmerges);
2324 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2325 auto Merge =
2326 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2327 NewMergeRegs.push_back(Merge.getReg(0));
2328 }
2329
2330 // A truncate may be necessary if the requested type doesn't evenly divide the
2331 // original result type.
2332 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2333 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2334 } else {
2335 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2336 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2337 }
2338
2339 MI.eraseFromParent();
2340 return Legalized;
2341}
2342
2344LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2345 LLT WideTy) {
2346 if (TypeIdx != 0)
2347 return UnableToLegalize;
2348
2349 int NumDst = MI.getNumOperands() - 1;
2350 Register SrcReg = MI.getOperand(NumDst).getReg();
2351 LLT SrcTy = MRI.getType(SrcReg);
2352 if (SrcTy.isVector())
2353 return UnableToLegalize;
2354
2355 Register Dst0Reg = MI.getOperand(0).getReg();
2356 LLT DstTy = MRI.getType(Dst0Reg);
2357 if (!DstTy.isScalar())
2358 return UnableToLegalize;
2359
2360 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2361 if (SrcTy.isPointer()) {
2362 const DataLayout &DL = MIRBuilder.getDataLayout();
2363 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2364 LLVM_DEBUG(
2365 dbgs() << "Not casting non-integral address space integer\n");
2366 return UnableToLegalize;
2367 }
2368
2369 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2370 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2371 }
2372
2373 // Widen SrcTy to WideTy. This does not affect the result, but since the
2374 // user requested this size, it is probably better handled than SrcTy and
2375 // should reduce the total number of legalization artifacts.
2376 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2377 SrcTy = WideTy;
2378 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2379 }
2380
2381 // Theres no unmerge type to target. Directly extract the bits from the
2382 // source type
2383 unsigned DstSize = DstTy.getSizeInBits();
2384
2385 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2386 for (int I = 1; I != NumDst; ++I) {
2387 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2388 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2389 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2390 }
2391
2392 MI.eraseFromParent();
2393 return Legalized;
2394 }
2395
2396 // Extend the source to a wider type.
2397 LLT LCMTy = getLCMType(SrcTy, WideTy);
2398
2399 Register WideSrc = SrcReg;
2400 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2401 // TODO: If this is an integral address space, cast to integer and anyext.
2402 if (SrcTy.isPointer()) {
2403 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2404 return UnableToLegalize;
2405 }
2406
2407 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2408 }
2409
2410 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2411
2412 // Create a sequence of unmerges and merges to the original results. Since we
2413 // may have widened the source, we will need to pad the results with dead defs
2414 // to cover the source register.
2415 // e.g. widen s48 to s64:
2416 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2417 //
2418 // =>
2419 // %4:_(s192) = G_ANYEXT %0:_(s96)
2420 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2421 // ; unpack to GCD type, with extra dead defs
2422 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2423 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2424 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2425 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2426 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2427 const LLT GCDTy = getGCDType(WideTy, DstTy);
2428 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2429 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2430
2431 // Directly unmerge to the destination without going through a GCD type
2432 // if possible
2433 if (PartsPerRemerge == 1) {
2434 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2435
2436 for (int I = 0; I != NumUnmerge; ++I) {
2437 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2438
2439 for (int J = 0; J != PartsPerUnmerge; ++J) {
2440 int Idx = I * PartsPerUnmerge + J;
2441 if (Idx < NumDst)
2442 MIB.addDef(MI.getOperand(Idx).getReg());
2443 else {
2444 // Create dead def for excess components.
2445 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2446 }
2447 }
2448
2449 MIB.addUse(Unmerge.getReg(I));
2450 }
2451 } else {
2452 SmallVector<Register, 16> Parts;
2453 for (int J = 0; J != NumUnmerge; ++J)
2454 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2455
2456 SmallVector<Register, 8> RemergeParts;
2457 for (int I = 0; I != NumDst; ++I) {
2458 for (int J = 0; J < PartsPerRemerge; ++J) {
2459 const int Idx = I * PartsPerRemerge + J;
2460 RemergeParts.emplace_back(Parts[Idx]);
2461 }
2462
2463 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2464 RemergeParts.clear();
2465 }
2466 }
2467
2468 MI.eraseFromParent();
2469 return Legalized;
2470}
2471
2473LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2474 LLT WideTy) {
2475 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2476 unsigned Offset = MI.getOperand(2).getImm();
2477
2478 if (TypeIdx == 0) {
2479 if (SrcTy.isVector() || DstTy.isVector())
2480 return UnableToLegalize;
2481
2482 SrcOp Src(SrcReg);
2483 if (SrcTy.isPointer()) {
2484 // Extracts from pointers can be handled only if they are really just
2485 // simple integers.
2486 const DataLayout &DL = MIRBuilder.getDataLayout();
2487 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2488 return UnableToLegalize;
2489
2490 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2491 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2492 SrcTy = SrcAsIntTy;
2493 }
2494
2495 if (DstTy.isPointer())
2496 return UnableToLegalize;
2497
2498 if (Offset == 0) {
2499 // Avoid a shift in the degenerate case.
2500 MIRBuilder.buildTrunc(DstReg,
2501 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2502 MI.eraseFromParent();
2503 return Legalized;
2504 }
2505
2506 // Do a shift in the source type.
2507 LLT ShiftTy = SrcTy;
2508 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2509 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2510 ShiftTy = WideTy;
2511 }
2512
2513 auto LShr = MIRBuilder.buildLShr(
2514 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2515 MIRBuilder.buildTrunc(DstReg, LShr);
2516 MI.eraseFromParent();
2517 return Legalized;
2518 }
2519
2520 if (SrcTy.isScalar()) {
2521 Observer.changingInstr(MI);
2522 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2523 Observer.changedInstr(MI);
2524 return Legalized;
2525 }
2526
2527 if (!SrcTy.isVector())
2528 return UnableToLegalize;
2529
2530 if (DstTy != SrcTy.getElementType())
2531 return UnableToLegalize;
2532
2533 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2534 return UnableToLegalize;
2535
2536 Observer.changingInstr(MI);
2537 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2538
2539 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2540 Offset);
2541 widenScalarDst(MI, WideTy.getScalarType(), 0);
2542 Observer.changedInstr(MI);
2543 return Legalized;
2544}
2545
2547LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2548 LLT WideTy) {
2549 if (TypeIdx != 0 || WideTy.isVector())
2550 return UnableToLegalize;
2551 Observer.changingInstr(MI);
2552 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2553 widenScalarDst(MI, WideTy);
2554 Observer.changedInstr(MI);
2555 return Legalized;
2556}
2557
2559LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2560 LLT WideTy) {
2561 unsigned Opcode;
2562 unsigned ExtOpcode;
2563 std::optional<Register> CarryIn;
2564 switch (MI.getOpcode()) {
2565 default:
2566 llvm_unreachable("Unexpected opcode!");
2567 case TargetOpcode::G_SADDO:
2568 Opcode = TargetOpcode::G_ADD;
2569 ExtOpcode = TargetOpcode::G_SEXT;
2570 break;
2571 case TargetOpcode::G_SSUBO:
2572 Opcode = TargetOpcode::G_SUB;
2573 ExtOpcode = TargetOpcode::G_SEXT;
2574 break;
2575 case TargetOpcode::G_UADDO:
2576 Opcode = TargetOpcode::G_ADD;
2577 ExtOpcode = TargetOpcode::G_ZEXT;
2578 break;
2579 case TargetOpcode::G_USUBO:
2580 Opcode = TargetOpcode::G_SUB;
2581 ExtOpcode = TargetOpcode::G_ZEXT;
2582 break;
2583 case TargetOpcode::G_SADDE:
2584 Opcode = TargetOpcode::G_UADDE;
2585 ExtOpcode = TargetOpcode::G_SEXT;
2586 CarryIn = MI.getOperand(4).getReg();
2587 break;
2588 case TargetOpcode::G_SSUBE:
2589 Opcode = TargetOpcode::G_USUBE;
2590 ExtOpcode = TargetOpcode::G_SEXT;
2591 CarryIn = MI.getOperand(4).getReg();
2592 break;
2593 case TargetOpcode::G_UADDE:
2594 Opcode = TargetOpcode::G_UADDE;
2595 ExtOpcode = TargetOpcode::G_ZEXT;
2596 CarryIn = MI.getOperand(4).getReg();
2597 break;
2598 case TargetOpcode::G_USUBE:
2599 Opcode = TargetOpcode::G_USUBE;
2600 ExtOpcode = TargetOpcode::G_ZEXT;
2601 CarryIn = MI.getOperand(4).getReg();
2602 break;
2603 }
2604
2605 if (TypeIdx == 1) {
2606 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2607
2608 Observer.changingInstr(MI);
2609 if (CarryIn)
2610 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2611 widenScalarDst(MI, WideTy, 1);
2612
2613 Observer.changedInstr(MI);
2614 return Legalized;
2615 }
2616
2617 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2618 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2619 // Do the arithmetic in the larger type.
2620 Register NewOp;
2621 if (CarryIn) {
2622 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2623 NewOp = MIRBuilder
2624 .buildInstr(Opcode, {WideTy, CarryOutTy},
2625 {LHSExt, RHSExt, *CarryIn})
2626 .getReg(0);
2627 } else {
2628 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2629 }
2630 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2631 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2632 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2633 // There is no overflow if the ExtOp is the same as NewOp.
2634 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2635 // Now trunc the NewOp to the original result.
2636 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2637 MI.eraseFromParent();
2638 return Legalized;
2639}
2640
2642LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2643 LLT WideTy) {
2644 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2645 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2647 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2648 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2649 // We can convert this to:
2650 // 1. Any extend iN to iM
2651 // 2. SHL by M-N
2652 // 3. [US][ADD|SUB|SHL]SAT
2653 // 4. L/ASHR by M-N
2654 //
2655 // It may be more efficient to lower this to a min and a max operation in
2656 // the higher precision arithmetic if the promoted operation isn't legal,
2657 // but this decision is up to the target's lowering request.
2658 Register DstReg = MI.getOperand(0).getReg();
2659
2660 unsigned NewBits = WideTy.getScalarSizeInBits();
2661 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2662
2663 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2664 // must not left shift the RHS to preserve the shift amount.
2665 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2666 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2667 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2668 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2669 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2670 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2671
2672 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2673 {ShiftL, ShiftR}, MI.getFlags());
2674
2675 // Use a shift that will preserve the number of sign bits when the trunc is
2676 // folded away.
2677 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2678 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2679
2680 MIRBuilder.buildTrunc(DstReg, Result);
2681 MI.eraseFromParent();
2682 return Legalized;
2683}
2684
2686LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2687 LLT WideTy) {
2688 if (TypeIdx == 1) {
2689 Observer.changingInstr(MI);
2690 widenScalarDst(MI, WideTy, 1);
2691 Observer.changedInstr(MI);
2692 return Legalized;
2693 }
2694
2695 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2696 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2697 LLT SrcTy = MRI.getType(LHS);
2698 LLT OverflowTy = MRI.getType(OriginalOverflow);
2699 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2700
2701 // To determine if the result overflowed in the larger type, we extend the
2702 // input to the larger type, do the multiply (checking if it overflows),
2703 // then also check the high bits of the result to see if overflow happened
2704 // there.
2705 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2706 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2707 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2708
2709 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2710 // so we don't need to check the overflow result of larger type Mulo.
2711 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2712
2713 unsigned MulOpc =
2714 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2715
2716 MachineInstrBuilder Mulo;
2717 if (WideMulCanOverflow)
2718 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2719 {LeftOperand, RightOperand});
2720 else
2721 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2722
2723 auto Mul = Mulo->getOperand(0);
2724 MIRBuilder.buildTrunc(Result, Mul);
2725
2726 MachineInstrBuilder ExtResult;
2727 // Overflow occurred if it occurred in the larger type, or if the high part
2728 // of the result does not zero/sign-extend the low part. Check this second
2729 // possibility first.
2730 if (IsSigned) {
2731 // For signed, overflow occurred when the high part does not sign-extend
2732 // the low part.
2733 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2734 } else {
2735 // Unsigned overflow occurred when the high part does not zero-extend the
2736 // low part.
2737 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2738 }
2739
2740 if (WideMulCanOverflow) {
2741 auto Overflow =
2742 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2743 // Finally check if the multiplication in the larger type itself overflowed.
2744 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2745 } else {
2746 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2747 }
2748 MI.eraseFromParent();
2749 return Legalized;
2750}
2751
2754 unsigned Opcode = MI.getOpcode();
2755 switch (Opcode) {
2756 default:
2757 return UnableToLegalize;
2758 case TargetOpcode::G_ATOMICRMW_XCHG:
2759 case TargetOpcode::G_ATOMICRMW_ADD:
2760 case TargetOpcode::G_ATOMICRMW_SUB:
2761 case TargetOpcode::G_ATOMICRMW_AND:
2762 case TargetOpcode::G_ATOMICRMW_OR:
2763 case TargetOpcode::G_ATOMICRMW_XOR:
2764 case TargetOpcode::G_ATOMICRMW_MIN:
2765 case TargetOpcode::G_ATOMICRMW_MAX:
2766 case TargetOpcode::G_ATOMICRMW_UMIN:
2767 case TargetOpcode::G_ATOMICRMW_UMAX:
2768 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2769 Observer.changingInstr(MI);
2770 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2771 widenScalarDst(MI, WideTy, 0);
2772 Observer.changedInstr(MI);
2773 return Legalized;
2774 case TargetOpcode::G_ATOMIC_CMPXCHG:
2775 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2776 Observer.changingInstr(MI);
2777 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2778 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2779 widenScalarDst(MI, WideTy, 0);
2780 Observer.changedInstr(MI);
2781 return Legalized;
2782 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2783 if (TypeIdx == 0) {
2784 Observer.changingInstr(MI);
2785 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2786 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2787 widenScalarDst(MI, WideTy, 0);
2788 Observer.changedInstr(MI);
2789 return Legalized;
2790 }
2791 assert(TypeIdx == 1 &&
2792 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2793 Observer.changingInstr(MI);
2794 widenScalarDst(MI, WideTy, 1);
2795 Observer.changedInstr(MI);
2796 return Legalized;
2797 case TargetOpcode::G_EXTRACT:
2798 return widenScalarExtract(MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_INSERT:
2800 return widenScalarInsert(MI, TypeIdx, WideTy);
2801 case TargetOpcode::G_MERGE_VALUES:
2802 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2803 case TargetOpcode::G_UNMERGE_VALUES:
2804 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2805 case TargetOpcode::G_SADDO:
2806 case TargetOpcode::G_SSUBO:
2807 case TargetOpcode::G_UADDO:
2808 case TargetOpcode::G_USUBO:
2809 case TargetOpcode::G_SADDE:
2810 case TargetOpcode::G_SSUBE:
2811 case TargetOpcode::G_UADDE:
2812 case TargetOpcode::G_USUBE:
2813 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2814 case TargetOpcode::G_UMULO:
2815 case TargetOpcode::G_SMULO:
2816 return widenScalarMulo(MI, TypeIdx, WideTy);
2817 case TargetOpcode::G_SADDSAT:
2818 case TargetOpcode::G_SSUBSAT:
2819 case TargetOpcode::G_SSHLSAT:
2820 case TargetOpcode::G_UADDSAT:
2821 case TargetOpcode::G_USUBSAT:
2822 case TargetOpcode::G_USHLSAT:
2823 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2824 case TargetOpcode::G_CTTZ:
2825 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2826 case TargetOpcode::G_CTLZ:
2827 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2828 case TargetOpcode::G_CTLS:
2829 case TargetOpcode::G_CTPOP: {
2830 if (TypeIdx == 0) {
2831 Observer.changingInstr(MI);
2832 widenScalarDst(MI, WideTy, 0);
2833 Observer.changedInstr(MI);
2834 return Legalized;
2835 }
2836
2837 Register SrcReg = MI.getOperand(1).getReg();
2838
2839 // First extend the input.
2840 unsigned ExtOpc;
2841 switch (Opcode) {
2842 case TargetOpcode::G_CTTZ:
2843 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2844 case TargetOpcode::G_CTLZ_ZERO_UNDEF: // undef bits shifted out below
2845 ExtOpc = TargetOpcode::G_ANYEXT;
2846 break;
2847 case TargetOpcode::G_CTLS:
2848 ExtOpc = TargetOpcode::G_SEXT;
2849 break;
2850 default:
2851 ExtOpc = TargetOpcode::G_ZEXT;
2852 }
2853
2854 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2855 LLT CurTy = MRI.getType(SrcReg);
2856 unsigned NewOpc = Opcode;
2857 if (NewOpc == TargetOpcode::G_CTTZ) {
2858 // The count is the same in the larger type except if the original
2859 // value was zero. This can be handled by setting the bit just off
2860 // the top of the original type.
2861 auto TopBit = APInt::getOneBitSet(WideTy.getScalarSizeInBits(),
2862 CurTy.getScalarSizeInBits());
2863 MIBSrc = MIRBuilder.buildOr(
2864 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2865 // Now we know the operand is non-zero, use the more relaxed opcode.
2866 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2867 }
2868
2869 unsigned SizeDiff =
2870 WideTy.getScalarSizeInBits() - CurTy.getScalarSizeInBits();
2871
2872 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2873 // An optimization where the result is the CTLZ after the left shift by
2874 // (Difference in widety and current ty), that is,
2875 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2876 // Result = ctlz MIBSrc
2877 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2878 MIRBuilder.buildConstant(WideTy, SizeDiff));
2879 }
2880
2881 // Perform the operation at the larger size.
2882 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2883 // This is already the correct result for CTPOP and CTTZs
2884 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2885 // The correct result is NewOp - (Difference in widety and current ty).
2886 // At this stage SUB is guaranteed to be positive no-wrap,
2887 // that to be used in further KnownBits optimizations for CTLZ.
2888 MIBNewOp = MIRBuilder.buildSub(
2889 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff),
2890 Opcode == TargetOpcode::G_CTLZ
2891 ? std::optional<unsigned>(MachineInstr::NoUWrap)
2892 : std::nullopt);
2893 }
2894
2895 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2896 MI.eraseFromParent();
2897 return Legalized;
2898 }
2899 case TargetOpcode::G_BSWAP: {
2900 Observer.changingInstr(MI);
2901 Register DstReg = MI.getOperand(0).getReg();
2902
2903 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2904 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2905 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2906 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2907
2908 MI.getOperand(0).setReg(DstExt);
2909
2910 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2911
2912 LLT Ty = MRI.getType(DstReg);
2913 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2914 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2915 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2916
2917 MIRBuilder.buildTrunc(DstReg, ShrReg);
2918 Observer.changedInstr(MI);
2919 return Legalized;
2920 }
2921 case TargetOpcode::G_BITREVERSE: {
2922 Observer.changingInstr(MI);
2923
2924 Register DstReg = MI.getOperand(0).getReg();
2925 LLT Ty = MRI.getType(DstReg);
2926 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2927
2928 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2929 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2930 MI.getOperand(0).setReg(DstExt);
2931 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2932
2933 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2934 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2935 MIRBuilder.buildTrunc(DstReg, Shift);
2936 Observer.changedInstr(MI);
2937 return Legalized;
2938 }
2939 case TargetOpcode::G_FREEZE:
2940 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2941 Observer.changingInstr(MI);
2942 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2943 widenScalarDst(MI, WideTy);
2944 Observer.changedInstr(MI);
2945 return Legalized;
2946
2947 case TargetOpcode::G_ABS:
2948 Observer.changingInstr(MI);
2949 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2950 widenScalarDst(MI, WideTy);
2951 Observer.changedInstr(MI);
2952 return Legalized;
2953
2954 case TargetOpcode::G_ADD:
2955 case TargetOpcode::G_AND:
2956 case TargetOpcode::G_MUL:
2957 case TargetOpcode::G_OR:
2958 case TargetOpcode::G_XOR:
2959 case TargetOpcode::G_SUB:
2960 case TargetOpcode::G_SHUFFLE_VECTOR:
2961 // Perform operation at larger width (any extension is fines here, high bits
2962 // don't affect the result) and then truncate the result back to the
2963 // original type.
2964 Observer.changingInstr(MI);
2965 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2966 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2967 widenScalarDst(MI, WideTy);
2968 Observer.changedInstr(MI);
2969 return Legalized;
2970
2971 case TargetOpcode::G_SBFX:
2972 case TargetOpcode::G_UBFX:
2973 Observer.changingInstr(MI);
2974
2975 if (TypeIdx == 0) {
2976 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2977 widenScalarDst(MI, WideTy);
2978 } else {
2979 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2980 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2981 }
2982
2983 Observer.changedInstr(MI);
2984 return Legalized;
2985
2986 case TargetOpcode::G_SHL:
2987 Observer.changingInstr(MI);
2988
2989 if (TypeIdx == 0) {
2990 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2991 widenScalarDst(MI, WideTy);
2992 } else {
2993 assert(TypeIdx == 1);
2994 // The "number of bits to shift" operand must preserve its value as an
2995 // unsigned integer:
2996 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2997 }
2998
2999 Observer.changedInstr(MI);
3000 return Legalized;
3001
3002 case TargetOpcode::G_ROTR:
3003 case TargetOpcode::G_ROTL:
3004 if (TypeIdx != 1)
3005 return UnableToLegalize;
3006
3007 Observer.changingInstr(MI);
3008 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3009 Observer.changedInstr(MI);
3010 return Legalized;
3011
3012 case TargetOpcode::G_SDIV:
3013 case TargetOpcode::G_SREM:
3014 case TargetOpcode::G_SMIN:
3015 case TargetOpcode::G_SMAX:
3016 case TargetOpcode::G_ABDS:
3017 Observer.changingInstr(MI);
3018 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3019 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3020 widenScalarDst(MI, WideTy);
3021 Observer.changedInstr(MI);
3022 return Legalized;
3023
3024 case TargetOpcode::G_SDIVREM:
3025 Observer.changingInstr(MI);
3026 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3027 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3028 widenScalarDst(MI, WideTy);
3029 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3030 widenScalarDst(MI, WideTy, 1);
3031 Observer.changedInstr(MI);
3032 return Legalized;
3033
3034 case TargetOpcode::G_ASHR:
3035 case TargetOpcode::G_LSHR:
3036 Observer.changingInstr(MI);
3037
3038 if (TypeIdx == 0) {
3039 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3040 : TargetOpcode::G_ZEXT;
3041
3042 widenScalarSrc(MI, WideTy, 1, CvtOp);
3043 widenScalarDst(MI, WideTy);
3044 } else {
3045 assert(TypeIdx == 1);
3046 // The "number of bits to shift" operand must preserve its value as an
3047 // unsigned integer:
3048 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3049 }
3050
3051 Observer.changedInstr(MI);
3052 return Legalized;
3053 case TargetOpcode::G_UDIV:
3054 case TargetOpcode::G_UREM:
3055 case TargetOpcode::G_ABDU:
3056 Observer.changingInstr(MI);
3057 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3058 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3059 widenScalarDst(MI, WideTy);
3060 Observer.changedInstr(MI);
3061 return Legalized;
3062 case TargetOpcode::G_UDIVREM:
3063 Observer.changingInstr(MI);
3064 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3065 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3066 widenScalarDst(MI, WideTy);
3067 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3068 widenScalarDst(MI, WideTy, 1);
3069 Observer.changedInstr(MI);
3070 return Legalized;
3071 case TargetOpcode::G_UMIN:
3072 case TargetOpcode::G_UMAX: {
3073 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3074
3075 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3076 unsigned ExtOpc =
3077 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3078 getApproximateEVTForLLT(WideTy, Ctx))
3079 ? TargetOpcode::G_SEXT
3080 : TargetOpcode::G_ZEXT;
3081
3082 Observer.changingInstr(MI);
3083 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3084 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3085 widenScalarDst(MI, WideTy);
3086 Observer.changedInstr(MI);
3087 return Legalized;
3088 }
3089
3090 case TargetOpcode::G_SELECT:
3091 Observer.changingInstr(MI);
3092 if (TypeIdx == 0) {
3093 // Perform operation at larger width (any extension is fine here, high
3094 // bits don't affect the result) and then truncate the result back to the
3095 // original type.
3096 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3097 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3098 widenScalarDst(MI, WideTy);
3099 } else {
3100 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3101 // Explicit extension is required here since high bits affect the result.
3102 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3103 }
3104 Observer.changedInstr(MI);
3105 return Legalized;
3106
3107 case TargetOpcode::G_FPEXT:
3108 if (TypeIdx != 1)
3109 return UnableToLegalize;
3110
3111 Observer.changingInstr(MI);
3112 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3113 Observer.changedInstr(MI);
3114 return Legalized;
3115 case TargetOpcode::G_FPTOSI:
3116 case TargetOpcode::G_FPTOUI:
3117 case TargetOpcode::G_INTRINSIC_LRINT:
3118 case TargetOpcode::G_INTRINSIC_LLRINT:
3119 case TargetOpcode::G_IS_FPCLASS:
3120 Observer.changingInstr(MI);
3121
3122 if (TypeIdx == 0)
3123 widenScalarDst(MI, WideTy);
3124 else
3125 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3126
3127 Observer.changedInstr(MI);
3128 return Legalized;
3129 case TargetOpcode::G_SITOFP:
3130 Observer.changingInstr(MI);
3131
3132 if (TypeIdx == 0)
3133 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3134 else
3135 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3136
3137 Observer.changedInstr(MI);
3138 return Legalized;
3139 case TargetOpcode::G_UITOFP:
3140 Observer.changingInstr(MI);
3141
3142 if (TypeIdx == 0)
3143 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3144 else
3145 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3146
3147 Observer.changedInstr(MI);
3148 return Legalized;
3149 case TargetOpcode::G_FPTOSI_SAT:
3150 case TargetOpcode::G_FPTOUI_SAT:
3151 Observer.changingInstr(MI);
3152
3153 if (TypeIdx == 0) {
3154 Register OldDst = MI.getOperand(0).getReg();
3155 LLT Ty = MRI.getType(OldDst);
3156 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3157 Register NewDst;
3158 MI.getOperand(0).setReg(ExtReg);
3159 uint64_t ShortBits = Ty.getScalarSizeInBits();
3160 uint64_t WideBits = WideTy.getScalarSizeInBits();
3161 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3162 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3163 // z = i16 fptosi_sat(a)
3164 // ->
3165 // x = i32 fptosi_sat(a)
3166 // y = smin(x, 32767)
3167 // z = smax(y, -32768)
3168 auto MaxVal = MIRBuilder.buildConstant(
3169 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3170 auto MinVal = MIRBuilder.buildConstant(
3171 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3172 Register MidReg =
3173 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3174 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3175 } else {
3176 // z = i16 fptoui_sat(a)
3177 // ->
3178 // x = i32 fptoui_sat(a)
3179 // y = smin(x, 65535)
3180 auto MaxVal = MIRBuilder.buildConstant(
3181 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3182 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3183 }
3184 MIRBuilder.buildTrunc(OldDst, NewDst);
3185 } else
3186 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3187
3188 Observer.changedInstr(MI);
3189 return Legalized;
3190 case TargetOpcode::G_LOAD:
3191 case TargetOpcode::G_SEXTLOAD:
3192 case TargetOpcode::G_ZEXTLOAD:
3193 case TargetOpcode::G_FPEXTLOAD:
3194 Observer.changingInstr(MI);
3195 widenScalarDst(MI, WideTy);
3196 Observer.changedInstr(MI);
3197 return Legalized;
3198
3199 case TargetOpcode::G_STORE: {
3200 if (TypeIdx != 0)
3201 return UnableToLegalize;
3202
3203 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3204 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3205 if (!Ty.isScalar()) {
3206 // We need to widen the vector element type.
3207 Observer.changingInstr(MI);
3208 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3209 // We also need to adjust the MMO to turn this into a truncating store.
3210 MachineMemOperand &MMO = **MI.memoperands_begin();
3211 MachineFunction &MF = MIRBuilder.getMF();
3212 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3213 MI.setMemRefs(MF, {NewMMO});
3214 Observer.changedInstr(MI);
3215 return Legalized;
3216 }
3217
3218 Observer.changingInstr(MI);
3219
3220 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3221 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3222 widenScalarSrc(MI, WideTy, 0, ExtType);
3223
3224 Observer.changedInstr(MI);
3225 return Legalized;
3226 }
3227 case TargetOpcode::G_FPTRUNCSTORE:
3228 if (TypeIdx != 0)
3229 return UnableToLegalize;
3230 Observer.changingInstr(MI);
3231 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_FPEXT);
3232 Observer.changedInstr(MI);
3233 return Legalized;
3234 case TargetOpcode::G_CONSTANT: {
3235 MachineOperand &SrcMO = MI.getOperand(1);
3236 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3237 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3238 MRI.getType(MI.getOperand(0).getReg()));
3239 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3240 ExtOpc == TargetOpcode::G_ANYEXT) &&
3241 "Illegal Extend");
3242 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3243 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3244 ? SrcVal.sext(WideTy.getSizeInBits())
3245 : SrcVal.zext(WideTy.getSizeInBits());
3246 Observer.changingInstr(MI);
3247 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3248
3249 widenScalarDst(MI, WideTy);
3250 Observer.changedInstr(MI);
3251 return Legalized;
3252 }
3253 case TargetOpcode::G_FCONSTANT: {
3254 // To avoid changing the bits of the constant due to extension to a larger
3255 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3256 MachineOperand &SrcMO = MI.getOperand(1);
3257 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3258 MIRBuilder.setInstrAndDebugLoc(MI);
3259 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3260 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3261 MI.eraseFromParent();
3262 return Legalized;
3263 }
3264 case TargetOpcode::G_IMPLICIT_DEF: {
3265 Observer.changingInstr(MI);
3266 widenScalarDst(MI, WideTy);
3267 Observer.changedInstr(MI);
3268 return Legalized;
3269 }
3270 case TargetOpcode::G_BRCOND:
3271 Observer.changingInstr(MI);
3272 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3273 Observer.changedInstr(MI);
3274 return Legalized;
3275
3276 case TargetOpcode::G_FCMP:
3277 Observer.changingInstr(MI);
3278 if (TypeIdx == 0)
3279 widenScalarDst(MI, WideTy);
3280 else {
3281 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3282 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3283 }
3284 Observer.changedInstr(MI);
3285 return Legalized;
3286
3287 case TargetOpcode::G_ICMP:
3288 Observer.changingInstr(MI);
3289 if (TypeIdx == 0)
3290 widenScalarDst(MI, WideTy);
3291 else {
3292 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3293 CmpInst::Predicate Pred =
3294 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3295
3296 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3297 unsigned ExtOpcode =
3298 (CmpInst::isSigned(Pred) ||
3299 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3300 getApproximateEVTForLLT(WideTy, Ctx)))
3301 ? TargetOpcode::G_SEXT
3302 : TargetOpcode::G_ZEXT;
3303 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3304 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3305 }
3306 Observer.changedInstr(MI);
3307 return Legalized;
3308
3309 case TargetOpcode::G_PTR_ADD:
3310 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3311 Observer.changingInstr(MI);
3312 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3313 Observer.changedInstr(MI);
3314 return Legalized;
3315
3316 case TargetOpcode::G_PHI: {
3317 assert(TypeIdx == 0 && "Expecting only Idx 0");
3318
3319 Observer.changingInstr(MI);
3320 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3321 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3322 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3323 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3324 }
3325
3326 MachineBasicBlock &MBB = *MI.getParent();
3327 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3328 widenScalarDst(MI, WideTy);
3329 Observer.changedInstr(MI);
3330 return Legalized;
3331 }
3332 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3333 if (TypeIdx == 0) {
3334 Register VecReg = MI.getOperand(1).getReg();
3335 LLT VecTy = MRI.getType(VecReg);
3336 Observer.changingInstr(MI);
3337
3338 widenScalarSrc(MI, LLT::vector(VecTy.getElementCount(), WideTy), 1,
3339 TargetOpcode::G_ANYEXT);
3340
3341 widenScalarDst(MI, WideTy, 0);
3342 Observer.changedInstr(MI);
3343 return Legalized;
3344 }
3345
3346 if (TypeIdx != 2)
3347 return UnableToLegalize;
3348 Observer.changingInstr(MI);
3349 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3350 Observer.changedInstr(MI);
3351 return Legalized;
3352 }
3353 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3354 if (TypeIdx == 0) {
3355 Observer.changingInstr(MI);
3356 const LLT WideEltTy = WideTy.getElementType();
3357
3358 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3359 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3360 widenScalarDst(MI, WideTy, 0);
3361 Observer.changedInstr(MI);
3362 return Legalized;
3363 }
3364
3365 if (TypeIdx == 1) {
3366 Observer.changingInstr(MI);
3367
3368 Register VecReg = MI.getOperand(1).getReg();
3369 LLT VecTy = MRI.getType(VecReg);
3370 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3371
3372 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3373 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3374 widenScalarDst(MI, WideVecTy, 0);
3375 Observer.changedInstr(MI);
3376 return Legalized;
3377 }
3378
3379 if (TypeIdx == 2) {
3380 Observer.changingInstr(MI);
3381 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3382 Observer.changedInstr(MI);
3383 return Legalized;
3384 }
3385
3386 return UnableToLegalize;
3387 }
3388 case TargetOpcode::G_FADD:
3389 case TargetOpcode::G_FMUL:
3390 case TargetOpcode::G_FSUB:
3391 case TargetOpcode::G_FMA:
3392 case TargetOpcode::G_FMAD:
3393 case TargetOpcode::G_FNEG:
3394 case TargetOpcode::G_FABS:
3395 case TargetOpcode::G_FCANONICALIZE:
3396 case TargetOpcode::G_FMINNUM:
3397 case TargetOpcode::G_FMAXNUM:
3398 case TargetOpcode::G_FMINNUM_IEEE:
3399 case TargetOpcode::G_FMAXNUM_IEEE:
3400 case TargetOpcode::G_FMINIMUM:
3401 case TargetOpcode::G_FMAXIMUM:
3402 case TargetOpcode::G_FMINIMUMNUM:
3403 case TargetOpcode::G_FMAXIMUMNUM:
3404 case TargetOpcode::G_FDIV:
3405 case TargetOpcode::G_FREM:
3406 case TargetOpcode::G_FCEIL:
3407 case TargetOpcode::G_FFLOOR:
3408 case TargetOpcode::G_FCOS:
3409 case TargetOpcode::G_FSIN:
3410 case TargetOpcode::G_FTAN:
3411 case TargetOpcode::G_FACOS:
3412 case TargetOpcode::G_FASIN:
3413 case TargetOpcode::G_FATAN:
3414 case TargetOpcode::G_FATAN2:
3415 case TargetOpcode::G_FCOSH:
3416 case TargetOpcode::G_FSINH:
3417 case TargetOpcode::G_FTANH:
3418 case TargetOpcode::G_FLOG10:
3419 case TargetOpcode::G_FLOG:
3420 case TargetOpcode::G_FLOG2:
3421 case TargetOpcode::G_FRINT:
3422 case TargetOpcode::G_FNEARBYINT:
3423 case TargetOpcode::G_FSQRT:
3424 case TargetOpcode::G_FEXP:
3425 case TargetOpcode::G_FEXP2:
3426 case TargetOpcode::G_FEXP10:
3427 case TargetOpcode::G_FPOW:
3428 case TargetOpcode::G_INTRINSIC_TRUNC:
3429 case TargetOpcode::G_INTRINSIC_ROUND:
3430 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3431 assert(TypeIdx == 0);
3432 Observer.changingInstr(MI);
3433
3434 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3435 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3436
3437 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3438 Observer.changedInstr(MI);
3439 return Legalized;
3440 case TargetOpcode::G_FMODF: {
3441 Observer.changingInstr(MI);
3442 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3443
3444 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3445 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3446 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3447 Observer.changedInstr(MI);
3448 return Legalized;
3449 }
3450 case TargetOpcode::G_FPOWI:
3451 case TargetOpcode::G_FLDEXP:
3452 case TargetOpcode::G_STRICT_FLDEXP: {
3453 if (TypeIdx == 0) {
3454 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3455 return UnableToLegalize;
3456
3457 Observer.changingInstr(MI);
3458 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3459 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462 }
3463
3464 if (TypeIdx == 1) {
3465 // For some reason SelectionDAG tries to promote to a libcall without
3466 // actually changing the integer type for promotion.
3467 Observer.changingInstr(MI);
3468 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3469 Observer.changedInstr(MI);
3470 return Legalized;
3471 }
3472
3473 return UnableToLegalize;
3474 }
3475 case TargetOpcode::G_FFREXP: {
3476 Observer.changingInstr(MI);
3477
3478 if (TypeIdx == 0) {
3479 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3480 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3481 } else {
3482 widenScalarDst(MI, WideTy, 1);
3483 }
3484
3485 Observer.changedInstr(MI);
3486 return Legalized;
3487 }
3488 case TargetOpcode::G_LROUND:
3489 case TargetOpcode::G_LLROUND:
3490 Observer.changingInstr(MI);
3491
3492 if (TypeIdx == 0)
3493 widenScalarDst(MI, WideTy);
3494 else
3495 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3496
3497 Observer.changedInstr(MI);
3498 return Legalized;
3499
3500 case TargetOpcode::G_INTTOPTR:
3501 if (TypeIdx != 1)
3502 return UnableToLegalize;
3503
3504 Observer.changingInstr(MI);
3505 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3506 Observer.changedInstr(MI);
3507 return Legalized;
3508 case TargetOpcode::G_PTRTOINT:
3509 if (TypeIdx != 0)
3510 return UnableToLegalize;
3511
3512 Observer.changingInstr(MI);
3513 widenScalarDst(MI, WideTy, 0);
3514 Observer.changedInstr(MI);
3515 return Legalized;
3516 case TargetOpcode::G_BUILD_VECTOR: {
3517 Observer.changingInstr(MI);
3518
3519 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3520 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3521 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3522
3523 // Avoid changing the result vector type if the source element type was
3524 // requested.
3525 if (TypeIdx == 1) {
3526 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3527 } else {
3528 widenScalarDst(MI, WideTy, 0);
3529 }
3530
3531 Observer.changedInstr(MI);
3532 return Legalized;
3533 }
3534 case TargetOpcode::G_SEXT_INREG:
3535 if (TypeIdx != 0)
3536 return UnableToLegalize;
3537
3538 Observer.changingInstr(MI);
3539 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3540 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3541 Observer.changedInstr(MI);
3542 return Legalized;
3543 case TargetOpcode::G_PTRMASK: {
3544 if (TypeIdx != 1)
3545 return UnableToLegalize;
3546 Observer.changingInstr(MI);
3547 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3548 Observer.changedInstr(MI);
3549 return Legalized;
3550 }
3551 case TargetOpcode::G_VECREDUCE_ADD: {
3552 if (TypeIdx != 1)
3553 return UnableToLegalize;
3554 Observer.changingInstr(MI);
3555 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3556 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3557 Observer.changedInstr(MI);
3558 return Legalized;
3559 }
3560 case TargetOpcode::G_VECREDUCE_FADD:
3561 case TargetOpcode::G_VECREDUCE_FMUL:
3562 case TargetOpcode::G_VECREDUCE_FMIN:
3563 case TargetOpcode::G_VECREDUCE_FMAX:
3564 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3565 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3566 if (TypeIdx != 0)
3567 return UnableToLegalize;
3568 Observer.changingInstr(MI);
3569 Register VecReg = MI.getOperand(1).getReg();
3570 LLT VecTy = MRI.getType(VecReg);
3571 LLT WideVecTy = VecTy.changeElementType(WideTy);
3572 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3573 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3574 Observer.changedInstr(MI);
3575 return Legalized;
3576 }
3577 case TargetOpcode::G_VSCALE: {
3578 MachineOperand &SrcMO = MI.getOperand(1);
3579 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3580 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3581 // The CImm is always a signed value
3582 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3583 Observer.changingInstr(MI);
3584 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3585 widenScalarDst(MI, WideTy);
3586 Observer.changedInstr(MI);
3587 return Legalized;
3588 }
3589 case TargetOpcode::G_SPLAT_VECTOR: {
3590 if (TypeIdx != 1)
3591 return UnableToLegalize;
3592
3593 Observer.changingInstr(MI);
3594 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3595 Observer.changedInstr(MI);
3596 return Legalized;
3597 }
3598 case TargetOpcode::G_INSERT_SUBVECTOR: {
3599 if (TypeIdx != 0)
3600 return UnableToLegalize;
3601
3603 Register BigVec = IS.getBigVec();
3604 Register SubVec = IS.getSubVec();
3605
3606 LLT SubVecTy = MRI.getType(SubVec);
3607 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3608
3609 // Widen the G_INSERT_SUBVECTOR
3610 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3611 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3612 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3613 IS.getIndexImm());
3614
3615 // Truncate back down
3616 auto SplatZero = MIRBuilder.buildSplatVector(
3617 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3618 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3619 SplatZero);
3620
3621 MI.eraseFromParent();
3622
3623 return Legalized;
3624 }
3625 }
3626}
3627
3629 MachineIRBuilder &B, Register Src, LLT Ty) {
3630 auto Unmerge = B.buildUnmerge(Ty, Src);
3631 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3632 Pieces.push_back(Unmerge.getReg(I));
3633}
3634
3635static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3636 MachineIRBuilder &MIRBuilder) {
3637 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3638 MachineFunction &MF = MIRBuilder.getMF();
3639 const DataLayout &DL = MIRBuilder.getDataLayout();
3640 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3641 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3642 LLT DstLLT = MRI.getType(DstReg);
3643
3644 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3645
3646 auto Addr = MIRBuilder.buildConstantPool(
3647 AddrPtrTy,
3648 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3649
3650 MachineMemOperand *MMO =
3652 MachineMemOperand::MOLoad, DstLLT, Alignment);
3653
3654 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3655}
3656
3659 const MachineOperand &ConstOperand = MI.getOperand(1);
3660 const Constant *ConstantVal = ConstOperand.getCImm();
3661
3662 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3663 MI.eraseFromParent();
3664
3665 return Legalized;
3666}
3667
3670 const MachineOperand &ConstOperand = MI.getOperand(1);
3671 const Constant *ConstantVal = ConstOperand.getFPImm();
3672
3673 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3674 MI.eraseFromParent();
3675
3676 return Legalized;
3677}
3678
3681 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3682 if (SrcTy.isVector()) {
3683 LLT SrcEltTy = SrcTy.getElementType();
3685
3686 if (DstTy.isVector()) {
3687 int NumDstElt = DstTy.getNumElements();
3688 int NumSrcElt = SrcTy.getNumElements();
3689
3690 LLT DstEltTy = DstTy.getElementType();
3691 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3692 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3693
3694 // If there's an element size mismatch, insert intermediate casts to match
3695 // the result element type.
3696 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3697 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3698 //
3699 // =>
3700 //
3701 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3702 // %3:_(<2 x s8>) = G_BITCAST %2
3703 // %4:_(<2 x s8>) = G_BITCAST %3
3704 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3705 DstCastTy = DstTy.changeVectorElementCount(
3706 ElementCount::getFixed(NumDstElt / NumSrcElt));
3707 SrcPartTy = SrcEltTy;
3708 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3709 //
3710 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3711 //
3712 // =>
3713 //
3714 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3715 // %3:_(s16) = G_BITCAST %2
3716 // %4:_(s16) = G_BITCAST %3
3717 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3718 SrcPartTy = SrcTy.changeVectorElementCount(
3719 ElementCount::getFixed(NumSrcElt / NumDstElt));
3720 DstCastTy = DstEltTy;
3721 }
3722
3723 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3724 for (Register &SrcReg : SrcRegs)
3725 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3726 } else
3727 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3728
3729 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3730 MI.eraseFromParent();
3731 return Legalized;
3732 }
3733
3734 if (DstTy.isVector()) {
3736 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3737 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3738 MI.eraseFromParent();
3739 return Legalized;
3740 }
3741
3742 return UnableToLegalize;
3743}
3744
3745/// Figure out the bit offset into a register when coercing a vector index for
3746/// the wide element type. This is only for the case when promoting vector to
3747/// one with larger elements.
3748//
3749///
3750/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3751/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3753 Register Idx,
3754 unsigned NewEltSize,
3755 unsigned OldEltSize) {
3756 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3757 LLT IdxTy = B.getMRI()->getType(Idx);
3758
3759 // Now figure out the amount we need to shift to get the target bits.
3760 auto OffsetMask = B.buildConstant(
3761 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3762 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3763 return B.buildShl(IdxTy, OffsetIdx,
3764 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3765}
3766
3767/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3768/// is casting to a vector with a smaller element size, perform multiple element
3769/// extracts and merge the results. If this is coercing to a vector with larger
3770/// elements, index the bitcasted vector and extract the target element with bit
3771/// operations. This is intended to force the indexing in the native register
3772/// size for architectures that can dynamically index the register file.
3775 LLT CastTy) {
3776 if (TypeIdx != 1)
3777 return UnableToLegalize;
3778
3779 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3780
3781 LLT SrcEltTy = SrcVecTy.getElementType();
3782 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3783 unsigned OldNumElts = SrcVecTy.getNumElements();
3784
3785 LLT NewEltTy = CastTy.getScalarType();
3786 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3787
3788 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3789 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3790 if (NewNumElts > OldNumElts) {
3791 // Decreasing the vector element size
3792 //
3793 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3794 // =>
3795 // v4i32:castx = bitcast x:v2i64
3796 //
3797 // i64 = bitcast
3798 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3799 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3800 //
3801 if (NewNumElts % OldNumElts != 0)
3802 return UnableToLegalize;
3803
3804 // Type of the intermediate result vector.
3805 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3806 LLT MidTy =
3807 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3808
3809 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3810
3811 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3812 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3813
3814 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3815 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3816 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3817 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3818 NewOps[I] = Elt.getReg(0);
3819 }
3820
3821 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3822 MIRBuilder.buildBitcast(Dst, NewVec);
3823 MI.eraseFromParent();
3824 return Legalized;
3825 }
3826
3827 if (NewNumElts < OldNumElts) {
3828 if (NewEltSize % OldEltSize != 0)
3829 return UnableToLegalize;
3830
3831 // This only depends on powers of 2 because we use bit tricks to figure out
3832 // the bit offset we need to shift to get the target element. A general
3833 // expansion could emit division/multiply.
3834 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3835 return UnableToLegalize;
3836
3837 // Increasing the vector element size.
3838 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3839 //
3840 // =>
3841 //
3842 // %cast = G_BITCAST %vec
3843 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3844 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3845 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3846 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3847 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3848 // %elt = G_TRUNC %elt_bits
3849
3850 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3851 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3852
3853 // Divide to get the index in the wider element type.
3854 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3855
3856 Register WideElt = CastVec;
3857 if (CastTy.isVector()) {
3858 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3859 ScaledIdx).getReg(0);
3860 }
3861
3862 // Compute the bit offset into the register of the target element.
3864 MIRBuilder, Idx, NewEltSize, OldEltSize);
3865
3866 // Shift the wide element to get the target element.
3867 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3868 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3869 MI.eraseFromParent();
3870 return Legalized;
3871 }
3872
3873 return UnableToLegalize;
3874}
3875
3876/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3877/// TargetReg, while preserving other bits in \p TargetReg.
3878///
3879/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3881 Register TargetReg, Register InsertReg,
3882 Register OffsetBits) {
3883 LLT TargetTy = B.getMRI()->getType(TargetReg);
3884 LLT InsertTy = B.getMRI()->getType(InsertReg);
3885 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3886 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3887
3888 // Produce a bitmask of the value to insert
3889 auto EltMask = B.buildConstant(
3890 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3891 InsertTy.getSizeInBits()));
3892 // Shift it into position
3893 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3894 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3895
3896 // Clear out the bits in the wide element
3897 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3898
3899 // The value to insert has all zeros already, so stick it into the masked
3900 // wide element.
3901 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3902}
3903
3904/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3905/// is increasing the element size, perform the indexing in the target element
3906/// type, and use bit operations to insert at the element position. This is
3907/// intended for architectures that can dynamically index the register file and
3908/// want to force indexing in the native register size.
3911 LLT CastTy) {
3912 if (TypeIdx != 0)
3913 return UnableToLegalize;
3914
3915 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3916 MI.getFirst4RegLLTs();
3917 LLT VecTy = DstTy;
3918
3919 LLT VecEltTy = VecTy.getElementType();
3920 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3921 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3922 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3923
3924 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3925 unsigned OldNumElts = VecTy.getNumElements();
3926
3927 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3928 if (NewNumElts < OldNumElts) {
3929 if (NewEltSize % OldEltSize != 0)
3930 return UnableToLegalize;
3931
3932 // This only depends on powers of 2 because we use bit tricks to figure out
3933 // the bit offset we need to shift to get the target element. A general
3934 // expansion could emit division/multiply.
3935 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3936 return UnableToLegalize;
3937
3938 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3939 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3940
3941 // Divide to get the index in the wider element type.
3942 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3943
3944 Register ExtractedElt = CastVec;
3945 if (CastTy.isVector()) {
3946 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3947 ScaledIdx).getReg(0);
3948 }
3949
3950 // Compute the bit offset into the register of the target element.
3952 MIRBuilder, Idx, NewEltSize, OldEltSize);
3953
3954 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3955 Val, OffsetBits);
3956 if (CastTy.isVector()) {
3957 InsertedElt = MIRBuilder.buildInsertVectorElement(
3958 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3959 }
3960
3961 MIRBuilder.buildBitcast(Dst, InsertedElt);
3962 MI.eraseFromParent();
3963 return Legalized;
3964 }
3965
3966 return UnableToLegalize;
3967}
3968
3969// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3970// those that have smaller than legal operands.
3971//
3972// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3973//
3974// ===>
3975//
3976// s32 = G_BITCAST <4 x s8>
3977// s32 = G_BITCAST <4 x s8>
3978// s32 = G_BITCAST <4 x s8>
3979// s32 = G_BITCAST <4 x s8>
3980// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3981// <16 x s8> = G_BITCAST <4 x s32>
3984 LLT CastTy) {
3985 // Convert it to CONCAT instruction
3986 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3987 if (!ConcatMI) {
3988 return UnableToLegalize;
3989 }
3990
3991 // Check if bitcast is Legal
3992 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3993 LLT SrcScalTy = CastTy.getScalarType();
3994
3995 // Check if the build vector is Legal
3996 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3997 return UnableToLegalize;
3998 }
3999
4000 // Bitcast the sources
4001 SmallVector<Register> BitcastRegs;
4002 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4003 BitcastRegs.push_back(
4004 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4005 .getReg(0));
4006 }
4007
4008 // Build the scalar values into a vector
4009 Register BuildReg =
4010 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
4011 MIRBuilder.buildBitcast(DstReg, BuildReg);
4012
4013 MI.eraseFromParent();
4014 return Legalized;
4015}
4016
4017// This bitcasts a shuffle vector to a different type currently of the same
4018// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
4019// will be used instead.
4020//
4021// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
4022// ===>
4023// <4 x s64> = G_PTRTOINT <4 x p0>
4024// <4 x s64> = G_PTRTOINT <4 x p0>
4025// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
4026// <16 x p0> = G_INTTOPTR <16 x s64>
4029 LLT CastTy) {
4030 auto ShuffleMI = cast<GShuffleVector>(&MI);
4031 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4032 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4033
4034 // We currently only handle vectors of the same size.
4035 if (TypeIdx != 0 ||
4036 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4037 CastTy.getElementCount() != DstTy.getElementCount())
4038 return UnableToLegalize;
4039
4040 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4041
4042 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4043 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4044 auto Shuf =
4045 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4046 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4047
4048 MI.eraseFromParent();
4049 return Legalized;
4050}
4051
4052/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4053///
4054/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4055///
4056/// ===>
4057///
4058/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4059/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4060/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4063 LLT CastTy) {
4064 auto ES = cast<GExtractSubvector>(&MI);
4065
4066 if (!CastTy.isVector())
4067 return UnableToLegalize;
4068
4069 if (TypeIdx != 0)
4070 return UnableToLegalize;
4071
4072 Register Dst = ES->getReg(0);
4073 Register Src = ES->getSrcVec();
4074 uint64_t Idx = ES->getIndexImm();
4075
4076 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4077
4078 LLT DstTy = MRI.getType(Dst);
4079 LLT SrcTy = MRI.getType(Src);
4080 ElementCount DstTyEC = DstTy.getElementCount();
4081 ElementCount SrcTyEC = SrcTy.getElementCount();
4082 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4083 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4084
4085 if (DstTy == CastTy)
4086 return Legalized;
4087
4088 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4089 return UnableToLegalize;
4090
4091 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4092 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4093 if (CastEltSize < DstEltSize)
4094 return UnableToLegalize;
4095
4096 auto AdjustAmt = CastEltSize / DstEltSize;
4097 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4098 SrcTyMinElts % AdjustAmt != 0)
4099 return UnableToLegalize;
4100
4101 Idx /= AdjustAmt;
4102 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4103 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4104 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4105 MIRBuilder.buildBitcast(Dst, PromotedES);
4106
4107 ES->eraseFromParent();
4108 return Legalized;
4109}
4110
4111/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4112///
4113/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4114/// <vscale x 8 x i1>,
4115/// N
4116///
4117/// ===>
4118///
4119/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4120/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4121/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4122/// <vscale x 1 x i8>, N / 8
4123/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4126 LLT CastTy) {
4127 auto ES = cast<GInsertSubvector>(&MI);
4128
4129 if (!CastTy.isVector())
4130 return UnableToLegalize;
4131
4132 if (TypeIdx != 0)
4133 return UnableToLegalize;
4134
4135 Register Dst = ES->getReg(0);
4136 Register BigVec = ES->getBigVec();
4137 Register SubVec = ES->getSubVec();
4138 uint64_t Idx = ES->getIndexImm();
4139
4140 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4141
4142 LLT DstTy = MRI.getType(Dst);
4143 LLT BigVecTy = MRI.getType(BigVec);
4144 LLT SubVecTy = MRI.getType(SubVec);
4145
4146 if (DstTy == CastTy)
4147 return Legalized;
4148
4149 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4150 return UnableToLegalize;
4151
4152 ElementCount DstTyEC = DstTy.getElementCount();
4153 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4154 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4155 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4156 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4157 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4158
4159 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4160 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4161 if (CastEltSize < DstEltSize)
4162 return UnableToLegalize;
4163
4164 auto AdjustAmt = CastEltSize / DstEltSize;
4165 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4166 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4167 return UnableToLegalize;
4168
4169 Idx /= AdjustAmt;
4170 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4171 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4172 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4173 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4174 auto PromotedIS =
4175 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4176 MIRBuilder.buildBitcast(Dst, PromotedIS);
4177
4178 ES->eraseFromParent();
4179 return Legalized;
4180}
4181
4183 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4184 Register DstReg = LoadMI.getDstReg();
4185 Register PtrReg = LoadMI.getPointerReg();
4186 LLT DstTy = MRI.getType(DstReg);
4187 MachineMemOperand &MMO = LoadMI.getMMO();
4188 LLT MemTy = MMO.getMemoryType();
4189 MachineFunction &MF = MIRBuilder.getMF();
4190
4191 LLT EltTy = MemTy.getScalarType();
4192
4193 unsigned MemSizeInBits = MemTy.getSizeInBits();
4194 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4195
4196 if (MemSizeInBits != MemStoreSizeInBits) {
4197 if (MemTy.isVector())
4198 return UnableToLegalize;
4199
4200 // Promote to a byte-sized load if not loading an integral number of
4201 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4202 LLT WideMemTy = EltTy.changeElementSize(MemStoreSizeInBits);
4203 MachineMemOperand *NewMMO =
4204 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4205
4206 Register LoadReg = DstReg;
4207 LLT LoadTy = DstTy;
4208
4209 // If this wasn't already an extending load, we need to widen the result
4210 // register to avoid creating a load with a narrower result than the source.
4211 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4212 LoadTy = WideMemTy;
4213 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4214 }
4215
4216 if (isa<GSExtLoad>(LoadMI)) {
4217 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4218 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4219 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4220 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4221 // The extra bits are guaranteed to be zero, since we stored them that
4222 // way. A zext load from Wide thus automatically gives zext from MemVT.
4223 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4224 } else {
4225 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4226 }
4227
4228 if (DstTy != LoadTy)
4229 MIRBuilder.buildTrunc(DstReg, LoadReg);
4230
4231 LoadMI.eraseFromParent();
4232 return Legalized;
4233 }
4234
4235 // Big endian lowering not implemented.
4236 if (MIRBuilder.getDataLayout().isBigEndian())
4237 return UnableToLegalize;
4238
4239 // This load needs splitting into power of 2 sized loads.
4240 //
4241 // Our strategy here is to generate anyextending loads for the smaller
4242 // types up to next power-2 result type, and then combine the two larger
4243 // result values together, before truncating back down to the non-pow-2
4244 // type.
4245 // E.g. v1 = i24 load =>
4246 // v2 = i32 zextload (2 byte)
4247 // v3 = i32 load (1 byte)
4248 // v4 = i32 shl v3, 16
4249 // v5 = i32 or v4, v2
4250 // v1 = i24 trunc v5
4251 // By doing this we generate the correct truncate which should get
4252 // combined away as an artifact with a matching extend.
4253
4254 uint64_t LargeSplitSize, SmallSplitSize;
4255
4256 if (!isPowerOf2_32(MemSizeInBits)) {
4257 // This load needs splitting into power of 2 sized loads.
4258 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4259 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4260 } else {
4261 // This is already a power of 2, but we still need to split this in half.
4262 //
4263 // Assume we're being asked to decompose an unaligned load.
4264 // TODO: If this requires multiple splits, handle them all at once.
4265 auto &Ctx = MF.getFunction().getContext();
4266 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4267 return UnableToLegalize;
4268
4269 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4270 }
4271
4272 if (MemTy.isVector()) {
4273 // TODO: Handle vector extloads
4274 if (MemTy != DstTy)
4275 return UnableToLegalize;
4276
4277 Align Alignment = LoadMI.getAlign();
4278 // Given an alignment larger than the size of the memory, we can increase
4279 // the size of the load without needing to scalarize it.
4280 if (Alignment.value() * 8 > MemSizeInBits &&
4282 LLT MoreTy = DstTy.changeVectorElementCount(
4284 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4285 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4286 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4287 NewLoad.getReg(0));
4288 LoadMI.eraseFromParent();
4289 return Legalized;
4290 }
4291
4292 // TODO: We can do better than scalarizing the vector and at least split it
4293 // in half.
4294 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4295 }
4296
4297 MachineMemOperand *LargeMMO =
4298 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4299 MachineMemOperand *SmallMMO =
4300 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4301
4302 LLT PtrTy = MRI.getType(PtrReg);
4303 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4304
4305 LLT AnyExtTy;
4306 LLT OffsetCstRes;
4307 if (EltTy.isPointer()) {
4308 AnyExtTy = LLT::scalar(AnyExtSize);
4309 OffsetCstRes = LLT::scalar(PtrTy.getSizeInBits());
4310 } else {
4311 AnyExtTy = EltTy.changeElementSize(AnyExtSize);
4312 OffsetCstRes = EltTy.changeElementSize(PtrTy.getSizeInBits());
4313 }
4314
4315 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4316 PtrReg, *LargeMMO);
4317
4318 auto OffsetCst = MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4319 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4320 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4321 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4322 SmallPtr, *SmallMMO);
4323
4324 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4325 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4326
4327 if (AnyExtTy == DstTy)
4328 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4329 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4330 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4331 MIRBuilder.buildTrunc(DstReg, {Or});
4332 } else {
4333 assert(DstTy.isPointer() && "expected pointer");
4334 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4335
4336 // FIXME: We currently consider this to be illegal for non-integral address
4337 // spaces, but we need still need a way to reinterpret the bits.
4338 MIRBuilder.buildIntToPtr(DstReg, Or);
4339 }
4340
4341 LoadMI.eraseFromParent();
4342 return Legalized;
4343}
4344
4346 // Lower a non-power of 2 store into multiple pow-2 stores.
4347 // E.g. split an i24 store into an i16 store + i8 store.
4348 // We do this by first extending the stored value to the next largest power
4349 // of 2 type, and then using truncating stores to store the components.
4350 // By doing this, likewise with G_LOAD, generate an extend that can be
4351 // artifact-combined away instead of leaving behind extracts.
4352 Register SrcReg = StoreMI.getValueReg();
4353 Register PtrReg = StoreMI.getPointerReg();
4354 LLT SrcTy = MRI.getType(SrcReg);
4355 MachineFunction &MF = MIRBuilder.getMF();
4356 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4357 LLT MemTy = MMO.getMemoryType();
4358
4359 unsigned StoreWidth = MemTy.getSizeInBits();
4360 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4361
4362 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4363 // Promote to a byte-sized store with upper bits zero if not
4364 // storing an integral number of bytes. For example, promote
4365 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4366 LLT WideTy = LLT::integer(StoreSizeInBits);
4367
4368 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4369 // Avoid creating a store with a narrower source than result.
4370 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4371 SrcTy = WideTy;
4372 }
4373
4374 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4375
4376 MachineMemOperand *NewMMO =
4377 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4378 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4379 StoreMI.eraseFromParent();
4380 return Legalized;
4381 }
4382
4383 if (MemTy.isVector()) {
4384 if (MemTy != SrcTy)
4385 return scalarizeVectorBooleanStore(StoreMI);
4386
4387 // TODO: We can do better than scalarizing the vector and at least split it
4388 // in half.
4389 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4390 }
4391
4392 unsigned MemSizeInBits = MemTy.getSizeInBits();
4393 uint64_t LargeSplitSize, SmallSplitSize;
4394
4395 if (!isPowerOf2_32(MemSizeInBits)) {
4396 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4397 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4398 } else {
4399 auto &Ctx = MF.getFunction().getContext();
4400 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4401 return UnableToLegalize; // Don't know what we're being asked to do.
4402
4403 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4404 }
4405
4406 // Extend to the next pow-2. If this store was itself the result of lowering,
4407 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4408 // that's wider than the stored size.
4409 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4410 const LLT NewSrcTy = LLT::integer(AnyExtSize);
4411
4412 if (SrcTy.isPointer()) {
4413 const LLT IntPtrTy = LLT::integer(SrcTy.getSizeInBits());
4414 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4415 }
4416
4417 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4418
4419 // Obtain the smaller value by shifting away the larger value.
4420 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4421 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4422
4423 // Generate the PtrAdd and truncating stores.
4424 LLT PtrTy = MRI.getType(PtrReg);
4425 auto OffsetCst = MIRBuilder.buildConstant(LLT::integer(PtrTy.getSizeInBits()),
4426 LargeSplitSize / 8);
4427 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4428
4429 MachineMemOperand *LargeMMO =
4430 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4431 MachineMemOperand *SmallMMO =
4432 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4433 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4434 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4435 StoreMI.eraseFromParent();
4436 return Legalized;
4437}
4438
4441 Register SrcReg = StoreMI.getValueReg();
4442 Register PtrReg = StoreMI.getPointerReg();
4443 LLT SrcTy = MRI.getType(SrcReg);
4444 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4445 LLT MemTy = MMO.getMemoryType();
4446 LLT MemScalarTy = MemTy.getElementType();
4447 MachineFunction &MF = MIRBuilder.getMF();
4448
4449 assert(SrcTy.isVector() && "Expect a vector store type");
4450
4451 if (!MemScalarTy.isByteSized()) {
4452 // We need to build an integer scalar of the vector bit pattern.
4453 // It's not legal for us to add padding when storing a vector.
4454 unsigned NumBits = MemTy.getSizeInBits();
4455 LLT IntTy = LLT::integer(NumBits);
4456 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4457 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4458
4459 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4460 auto Elt = MIRBuilder.buildExtractVectorElement(
4461 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4462 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4463 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4464 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4465 ? (MemTy.getNumElements() - 1) - I
4466 : I;
4467 auto ShiftAmt = MIRBuilder.buildConstant(
4468 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4469 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4470 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4471 }
4472 auto PtrInfo = MMO.getPointerInfo();
4473 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4474 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4475 StoreMI.eraseFromParent();
4476 return Legalized;
4477 }
4478
4479 // TODO: implement simple scalarization.
4480 return UnableToLegalize;
4481}
4482
4484LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4485 switch (MI.getOpcode()) {
4486 case TargetOpcode::G_LOAD: {
4487 if (TypeIdx != 0)
4488 return UnableToLegalize;
4489 MachineMemOperand &MMO = **MI.memoperands_begin();
4490
4491 // Not sure how to interpret a bitcast of an extending load.
4492 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4493 return UnableToLegalize;
4494
4495 Observer.changingInstr(MI);
4496 bitcastDst(MI, CastTy, 0);
4497 MMO.setType(CastTy);
4498 // The range metadata is no longer valid when reinterpreted as a different
4499 // type.
4500 MMO.clearRanges();
4501 Observer.changedInstr(MI);
4502 return Legalized;
4503 }
4504 case TargetOpcode::G_STORE: {
4505 if (TypeIdx != 0)
4506 return UnableToLegalize;
4507
4508 MachineMemOperand &MMO = **MI.memoperands_begin();
4509
4510 // Not sure how to interpret a bitcast of a truncating store.
4511 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4512 return UnableToLegalize;
4513
4514 Observer.changingInstr(MI);
4515 bitcastSrc(MI, CastTy, 0);
4516 MMO.setType(CastTy);
4517 Observer.changedInstr(MI);
4518 return Legalized;
4519 }
4520 case TargetOpcode::G_SELECT: {
4521 if (TypeIdx != 0)
4522 return UnableToLegalize;
4523
4524 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4525 LLVM_DEBUG(
4526 dbgs() << "bitcast action not implemented for vector select\n");
4527 return UnableToLegalize;
4528 }
4529
4530 Observer.changingInstr(MI);
4531 bitcastSrc(MI, CastTy, 2);
4532 bitcastSrc(MI, CastTy, 3);
4533 bitcastDst(MI, CastTy, 0);
4534 Observer.changedInstr(MI);
4535 return Legalized;
4536 }
4537 case TargetOpcode::G_AND:
4538 case TargetOpcode::G_OR:
4539 case TargetOpcode::G_XOR: {
4540 Observer.changingInstr(MI);
4541 bitcastSrc(MI, CastTy, 1);
4542 bitcastSrc(MI, CastTy, 2);
4543 bitcastDst(MI, CastTy, 0);
4544 Observer.changedInstr(MI);
4545 return Legalized;
4546 }
4547 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4548 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4549 case TargetOpcode::G_INSERT_VECTOR_ELT:
4550 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4551 case TargetOpcode::G_CONCAT_VECTORS:
4552 return bitcastConcatVector(MI, TypeIdx, CastTy);
4553 case TargetOpcode::G_SHUFFLE_VECTOR:
4554 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4555 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4556 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4557 case TargetOpcode::G_INSERT_SUBVECTOR:
4558 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4559 default:
4560 return UnableToLegalize;
4561 }
4562}
4563
4564// Legalize an instruction by changing the opcode in place.
4565void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4567 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4569}
4570
4572LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4573 using namespace TargetOpcode;
4574
4575 switch(MI.getOpcode()) {
4576 default:
4577 return UnableToLegalize;
4578 case TargetOpcode::G_FCONSTANT:
4579 return lowerFConstant(MI);
4580 case TargetOpcode::G_BITCAST:
4581 return lowerBitcast(MI);
4582 case TargetOpcode::G_SREM:
4583 case TargetOpcode::G_UREM: {
4584 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4585 auto Quot =
4586 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4587 {MI.getOperand(1), MI.getOperand(2)});
4588
4589 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4590 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4591 MI.eraseFromParent();
4592 return Legalized;
4593 }
4594 case TargetOpcode::G_SADDO:
4595 case TargetOpcode::G_SSUBO:
4596 return lowerSADDO_SSUBO(MI);
4597 case TargetOpcode::G_SADDE:
4598 return lowerSADDE(MI);
4599 case TargetOpcode::G_SSUBE:
4600 return lowerSSUBE(MI);
4601 case TargetOpcode::G_UMULH:
4602 case TargetOpcode::G_SMULH:
4603 return lowerSMULH_UMULH(MI);
4604 case TargetOpcode::G_SMULO:
4605 case TargetOpcode::G_UMULO: {
4606 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4607 // result.
4608 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4609 LLT Ty = MRI.getType(Res);
4610
4611 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4612 ? TargetOpcode::G_SMULH
4613 : TargetOpcode::G_UMULH;
4614
4615 Observer.changingInstr(MI);
4616 const auto &TII = MIRBuilder.getTII();
4617 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4618 MI.removeOperand(1);
4619 Observer.changedInstr(MI);
4620
4621 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4622 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4623
4624 // Move insert point forward so we can use the Res register if needed.
4625 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4626
4627 // For *signed* multiply, overflow is detected by checking:
4628 // (hi != (lo >> bitwidth-1))
4629 if (Opcode == TargetOpcode::G_SMULH) {
4630 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4631 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4632 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4633 } else {
4634 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4635 }
4636 return Legalized;
4637 }
4638 case TargetOpcode::G_FNEG: {
4639 auto [Res, ResTy, SubByReg, SubByRegTy] = MI.getFirst2RegLLTs();
4640 LLT TyInt =
4641 ResTy.changeElementType(LLT::integer(ResTy.getScalarSizeInBits()));
4642 Register CastedSubByReg = SubByReg;
4643
4644 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4645 !SubByRegTy.getScalarType().isInteger()) {
4646 auto BitcastDst = SubByRegTy.changeElementType(
4647 LLT::integer(SubByRegTy.getScalarSizeInBits()));
4648 CastedSubByReg = MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4649 }
4650
4651 auto SignMask = MIRBuilder.buildConstant(
4652 TyInt, APInt::getSignMask(TyInt.getScalarSizeInBits()));
4653
4654 if (ResTy != TyInt) {
4655 Register NewDst =
4656 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4657 MIRBuilder.buildBitcast(Res, NewDst);
4658 } else
4659 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4660
4661 MI.eraseFromParent();
4662 return Legalized;
4663 }
4664 case TargetOpcode::G_FSUB:
4665 case TargetOpcode::G_STRICT_FSUB: {
4666 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4667 LLT Ty = MRI.getType(Res);
4668
4669 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4670 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4671
4672 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4673 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4674 else
4675 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4676
4677 MI.eraseFromParent();
4678 return Legalized;
4679 }
4680 case TargetOpcode::G_FMAD:
4681 return lowerFMad(MI);
4682 case TargetOpcode::G_FFLOOR:
4683 return lowerFFloor(MI);
4684 case TargetOpcode::G_LROUND:
4685 case TargetOpcode::G_LLROUND: {
4686 Register DstReg = MI.getOperand(0).getReg();
4687 Register SrcReg = MI.getOperand(1).getReg();
4688 LLT SrcTy = MRI.getType(SrcReg);
4689 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4690 {SrcReg});
4691 MIRBuilder.buildFPTOSI(DstReg, Round);
4692 MI.eraseFromParent();
4693 return Legalized;
4694 }
4695 case TargetOpcode::G_INTRINSIC_ROUND:
4696 return lowerIntrinsicRound(MI);
4697 case TargetOpcode::G_FRINT: {
4698 // Since round even is the assumed rounding mode for unconstrained FP
4699 // operations, rint and roundeven are the same operation.
4700 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4701 return Legalized;
4702 }
4703 case TargetOpcode::G_INTRINSIC_LRINT:
4704 case TargetOpcode::G_INTRINSIC_LLRINT: {
4705 Register DstReg = MI.getOperand(0).getReg();
4706 Register SrcReg = MI.getOperand(1).getReg();
4707 LLT SrcTy = MRI.getType(SrcReg);
4708 auto Round =
4709 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4710 MIRBuilder.buildFPTOSI(DstReg, Round);
4711 MI.eraseFromParent();
4712 return Legalized;
4713 }
4714 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4715 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4716 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4717 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4718 **MI.memoperands_begin());
4719 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4720 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4721 MI.eraseFromParent();
4722 return Legalized;
4723 }
4724 case TargetOpcode::G_LOAD:
4725 case TargetOpcode::G_SEXTLOAD:
4726 case TargetOpcode::G_ZEXTLOAD:
4727 return lowerLoad(cast<GAnyLoad>(MI));
4728 case TargetOpcode::G_STORE:
4729 return lowerStore(cast<GStore>(MI));
4730 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4731 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4732 case TargetOpcode::G_CTLZ:
4733 case TargetOpcode::G_CTTZ:
4734 case TargetOpcode::G_CTPOP:
4735 case TargetOpcode::G_CTLS:
4736 return lowerBitCount(MI);
4737 case G_UADDO: {
4738 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4739
4740 Register NewRes = MRI.cloneVirtualRegister(Res);
4741
4742 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4743 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4744
4745 MIRBuilder.buildCopy(Res, NewRes);
4746
4747 MI.eraseFromParent();
4748 return Legalized;
4749 }
4750 case G_UADDE: {
4751 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4752 const LLT CondTy = MRI.getType(CarryOut);
4753 const LLT Ty = MRI.getType(Res);
4754
4755 Register NewRes = MRI.cloneVirtualRegister(Res);
4756
4757 // Initial add of the two operands.
4758 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4759
4760 // Initial check for carry.
4761 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4762
4763 // Add the sum and the carry.
4764 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4765 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4766
4767 // Second check for carry. We can only carry if the initial sum is all 1s
4768 // and the carry is set, resulting in a new sum of 0.
4769 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4770 auto ResEqZero =
4771 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4772 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4773 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4774
4775 MIRBuilder.buildCopy(Res, NewRes);
4776
4777 MI.eraseFromParent();
4778 return Legalized;
4779 }
4780 case G_USUBO: {
4781 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4782
4783 MIRBuilder.buildSub(Res, LHS, RHS);
4784 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4785
4786 MI.eraseFromParent();
4787 return Legalized;
4788 }
4789 case G_USUBE: {
4790 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4791 const LLT CondTy = MRI.getType(BorrowOut);
4792 const LLT Ty = MRI.getType(Res);
4793
4794 // Initial subtract of the two operands.
4795 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4796
4797 // Initial check for borrow.
4798 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4799
4800 // Subtract the borrow from the first subtract.
4801 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4802 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4803
4804 // Second check for borrow. We can only borrow if the initial difference is
4805 // 0 and the borrow is set, resulting in a new difference of all 1s.
4806 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4807 auto TmpResEqZero =
4808 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4809 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4810 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4811
4812 MI.eraseFromParent();
4813 return Legalized;
4814 }
4815 case G_UITOFP:
4816 return lowerUITOFP(MI);
4817 case G_SITOFP:
4818 return lowerSITOFP(MI);
4819 case G_FPTOUI:
4820 return lowerFPTOUI(MI);
4821 case G_FPTOSI:
4822 return lowerFPTOSI(MI);
4823 case G_FPTOUI_SAT:
4824 case G_FPTOSI_SAT:
4825 return lowerFPTOINT_SAT(MI);
4826 case G_FPEXT:
4827 return lowerFPExtAndTruncMem(MI);
4828 case G_FPTRUNC:
4829 return lowerFPTRUNC(MI);
4830 case G_FPOWI:
4831 return lowerFPOWI(MI);
4832 case G_FMODF:
4833 return lowerFMODF(MI);
4834 case G_SMIN:
4835 case G_SMAX:
4836 case G_UMIN:
4837 case G_UMAX:
4838 return lowerMinMax(MI);
4839 case G_SCMP:
4840 case G_UCMP:
4841 return lowerThreewayCompare(MI);
4842 case G_FCOPYSIGN:
4843 return lowerFCopySign(MI);
4844 case G_FMINNUM:
4845 case G_FMAXNUM:
4846 case G_FMINIMUMNUM:
4847 case G_FMAXIMUMNUM:
4848 return lowerFMinNumMaxNum(MI);
4849 case G_FMINIMUM:
4850 case G_FMAXIMUM:
4851 return lowerFMinimumMaximum(MI);
4852 case G_MERGE_VALUES:
4853 return lowerMergeValues(MI);
4854 case G_UNMERGE_VALUES:
4855 return lowerUnmergeValues(MI);
4856 case TargetOpcode::G_SEXT_INREG: {
4857 assert(MI.getOperand(2).isImm() && "Expected immediate");
4858 int64_t SizeInBits = MI.getOperand(2).getImm();
4859
4860 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4861 LLT DstTy = MRI.getType(DstReg);
4862 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4863
4864 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4865 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4866 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4867 MI.eraseFromParent();
4868 return Legalized;
4869 }
4870 case G_EXTRACT_VECTOR_ELT:
4871 case G_INSERT_VECTOR_ELT:
4873 case G_SHUFFLE_VECTOR:
4874 return lowerShuffleVector(MI);
4875 case G_VECTOR_COMPRESS:
4876 return lowerVECTOR_COMPRESS(MI);
4877 case G_DYN_STACKALLOC:
4878 return lowerDynStackAlloc(MI);
4879 case G_STACKSAVE:
4880 return lowerStackSave(MI);
4881 case G_STACKRESTORE:
4882 return lowerStackRestore(MI);
4883 case G_EXTRACT:
4884 return lowerExtract(MI);
4885 case G_INSERT:
4886 return lowerInsert(MI);
4887 case G_BSWAP:
4888 return lowerBswap(MI);
4889 case G_BITREVERSE:
4890 return lowerBitreverse(MI);
4891 case G_READ_REGISTER:
4892 case G_WRITE_REGISTER:
4893 return lowerReadWriteRegister(MI);
4894 case G_UADDSAT:
4895 case G_USUBSAT: {
4896 // Try to make a reasonable guess about which lowering strategy to use. The
4897 // target can override this with custom lowering and calling the
4898 // implementation functions.
4899 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4900 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4901 return lowerAddSubSatToMinMax(MI);
4903 }
4904 case G_SADDSAT:
4905 case G_SSUBSAT: {
4906 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4907
4908 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4909 // since it's a shorter expansion. However, we would need to figure out the
4910 // preferred boolean type for the carry out for the query.
4911 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4912 return lowerAddSubSatToMinMax(MI);
4914 }
4915 case G_SSHLSAT:
4916 case G_USHLSAT:
4917 return lowerShlSat(MI);
4918 case G_ABS:
4919 return lowerAbsToAddXor(MI);
4920 case G_ABDS:
4921 case G_ABDU: {
4922 bool IsSigned = MI.getOpcode() == G_ABDS;
4923 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4924 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4925 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4926 return lowerAbsDiffToMinMax(MI);
4927 }
4928 return lowerAbsDiffToSelect(MI);
4929 }
4930 case G_FABS:
4931 return lowerFAbs(MI);
4932 case G_SELECT:
4933 return lowerSelect(MI);
4934 case G_IS_FPCLASS:
4935 return lowerISFPCLASS(MI);
4936 case G_SDIVREM:
4937 case G_UDIVREM:
4938 return lowerDIVREM(MI);
4939 case G_FSHL:
4940 case G_FSHR:
4941 return lowerFunnelShift(MI);
4942 case G_ROTL:
4943 case G_ROTR:
4944 return lowerRotate(MI);
4945 case G_MEMSET:
4946 case G_MEMCPY:
4947 case G_MEMMOVE:
4948 return lowerMemCpyFamily(MI);
4949 case G_MEMCPY_INLINE:
4950 return lowerMemcpyInline(MI);
4951 case G_ZEXT:
4952 case G_SEXT:
4953 case G_ANYEXT:
4954 return lowerEXT(MI);
4955 case G_TRUNC:
4956 return lowerTRUNC(MI);
4958 return lowerVectorReduction(MI);
4959 case G_VAARG:
4960 return lowerVAArg(MI);
4961 case G_ATOMICRMW_SUB: {
4962 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4963 const LLT ValTy = MRI.getType(Val);
4964 MachineMemOperand *MMO = *MI.memoperands_begin();
4965
4966 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4967 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4968 MI.eraseFromParent();
4969 return Legalized;
4970 }
4971 }
4972}
4973
4975 Align MinAlign) const {
4976 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4977 // datalayout for the preferred alignment. Also there should be a target hook
4978 // for this to allow targets to reduce the alignment and ignore the
4979 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4980 // the type.
4981 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4982}
4983
4986 MachinePointerInfo &PtrInfo) {
4987 MachineFunction &MF = MIRBuilder.getMF();
4988 const DataLayout &DL = MIRBuilder.getDataLayout();
4989 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4990
4991 unsigned AddrSpace = DL.getAllocaAddrSpace();
4992 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4993
4994 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4995 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4996}
4997
4999 const SrcOp &Val) {
5000 LLT SrcTy = Val.getLLTTy(MRI);
5001 Align StackTypeAlign =
5002 std::max(getStackTemporaryAlignment(SrcTy),
5004 MachinePointerInfo PtrInfo;
5005 auto StackTemp =
5006 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
5007
5008 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5009 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5010}
5011
5013 LLT VecTy) {
5014 LLT IdxTy = B.getMRI()->getType(IdxReg);
5015 unsigned NElts = VecTy.getNumElements();
5016
5017 int64_t IdxVal;
5018 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
5019 if (IdxVal < VecTy.getNumElements())
5020 return IdxReg;
5021 // If a constant index would be out of bounds, clamp it as well.
5022 }
5023
5024 if (isPowerOf2_32(NElts)) {
5025 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
5026 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
5027 }
5028
5029 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
5030 .getReg(0);
5031}
5032
5034 Register Index) {
5035 LLT EltTy = VecTy.getElementType();
5036
5037 // Calculate the element offset and add it to the pointer.
5038 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
5039 assert(EltSize * 8 == EltTy.getSizeInBits() &&
5040 "Converting bits to bytes lost precision");
5041
5042 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
5043
5044 // Convert index to the correct size for the address space.
5045 const DataLayout &DL = MIRBuilder.getDataLayout();
5046 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5047 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
5048 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
5049 if (IdxTy != MRI.getType(Index))
5050 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5051
5052 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
5053 MIRBuilder.buildConstant(IdxTy, EltSize));
5054
5055 LLT PtrTy = MRI.getType(VecPtr);
5056 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
5057}
5058
5059#ifndef NDEBUG
5060/// Check that all vector operands have same number of elements. Other operands
5061/// should be listed in NonVecOp.
5064 std::initializer_list<unsigned> NonVecOpIndices) {
5065 if (MI.getNumMemOperands() != 0)
5066 return false;
5067
5068 LLT VecTy = MRI.getType(MI.getReg(0));
5069 if (!VecTy.isVector())
5070 return false;
5071 unsigned NumElts = VecTy.getNumElements();
5072
5073 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5074 MachineOperand &Op = MI.getOperand(OpIdx);
5075 if (!Op.isReg()) {
5076 if (!is_contained(NonVecOpIndices, OpIdx))
5077 return false;
5078 continue;
5079 }
5080
5081 LLT Ty = MRI.getType(Op.getReg());
5082 if (!Ty.isVector()) {
5083 if (!is_contained(NonVecOpIndices, OpIdx))
5084 return false;
5085 continue;
5086 }
5087
5088 if (Ty.getNumElements() != NumElts)
5089 return false;
5090 }
5091
5092 return true;
5093}
5094#endif
5095
5096/// Fill \p DstOps with DstOps that have same number of elements combined as
5097/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5098/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5099/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5100static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5101 unsigned NumElts) {
5102 LLT LeftoverTy;
5103 assert(Ty.isVector() && "Expected vector type");
5104 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5105 int NumParts, NumLeftover;
5106 std::tie(NumParts, NumLeftover) =
5107 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5108
5109 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5110 for (int i = 0; i < NumParts; ++i) {
5111 DstOps.push_back(NarrowTy);
5112 }
5113
5114 if (LeftoverTy.isValid()) {
5115 assert(NumLeftover == 1 && "expected exactly one leftover");
5116 DstOps.push_back(LeftoverTy);
5117 }
5118}
5119
5120/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5121/// made from \p Op depending on operand type.
5123 MachineOperand &Op) {
5124 for (unsigned i = 0; i < N; ++i) {
5125 if (Op.isReg())
5126 Ops.push_back(Op.getReg());
5127 else if (Op.isImm())
5128 Ops.push_back(Op.getImm());
5129 else if (Op.isPredicate())
5130 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5131 else
5132 llvm_unreachable("Unsupported type");
5133 }
5134}
5135
5136// Handle splitting vector operations which need to have the same number of
5137// elements in each type index, but each type index may have a different element
5138// type.
5139//
5140// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5141// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5142// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5143//
5144// Also handles some irregular breakdown cases, e.g.
5145// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5146// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5147// s64 = G_SHL s64, s32
5150 GenericMachineInstr &MI, unsigned NumElts,
5151 std::initializer_list<unsigned> NonVecOpIndices) {
5152 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5153 "Non-compatible opcode or not specified non-vector operands");
5154 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5155
5156 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5157 unsigned NumDefs = MI.getNumDefs();
5158
5159 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5160 // Build instructions with DstOps to use instruction found by CSE directly.
5161 // CSE copies found instruction into given vreg when building with vreg dest.
5162 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5163 // Output registers will be taken from created instructions.
5164 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5165 for (unsigned i = 0; i < NumDefs; ++i) {
5166 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5167 }
5168
5169 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5170 // Operands listed in NonVecOpIndices will be used as is without splitting;
5171 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5172 // scalar condition (op 1), immediate in sext_inreg (op 2).
5173 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5174 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5175 ++UseIdx, ++UseNo) {
5176 if (is_contained(NonVecOpIndices, UseIdx)) {
5177 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5178 MI.getOperand(UseIdx));
5179 } else {
5180 SmallVector<Register, 8> SplitPieces;
5181 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5182 MRI);
5183 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5184 }
5185 }
5186
5187 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5188
5189 // Take i-th piece of each input operand split and build sub-vector/scalar
5190 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5191 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5193 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5194 Defs.push_back(OutputOpsPieces[DstNo][i]);
5195
5197 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5198 Uses.push_back(InputOpsPieces[InputNo][i]);
5199
5200 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5201 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5202 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5203 }
5204
5205 // Merge small outputs into MI's output for each def operand.
5206 if (NumLeftovers) {
5207 for (unsigned i = 0; i < NumDefs; ++i)
5208 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5209 } else {
5210 for (unsigned i = 0; i < NumDefs; ++i)
5211 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5212 }
5213
5214 MI.eraseFromParent();
5215 return Legalized;
5216}
5217
5220 unsigned NumElts) {
5221 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5222
5223 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5224 unsigned NumDefs = MI.getNumDefs();
5225
5226 SmallVector<DstOp, 8> OutputOpsPieces;
5227 SmallVector<Register, 8> OutputRegs;
5228 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5229
5230 // Instructions that perform register split will be inserted in basic block
5231 // where register is defined (basic block is in the next operand).
5232 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5233 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5234 UseIdx += 2, ++UseNo) {
5235 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5236 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5237 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5238 MIRBuilder, MRI);
5239 }
5240
5241 // Build PHIs with fewer elements.
5242 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5243 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5244 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5245 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5246 Phi.addDef(
5247 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5248 OutputRegs.push_back(Phi.getReg(0));
5249
5250 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5251 Phi.addUse(InputOpsPieces[j][i]);
5252 Phi.add(MI.getOperand(1 + j * 2 + 1));
5253 }
5254 }
5255
5256 // Set the insert point after the existing PHIs
5257 MachineBasicBlock &MBB = *MI.getParent();
5258 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5259
5260 // Merge small outputs into MI's def.
5261 if (NumLeftovers) {
5262 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5263 } else {
5264 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5265 }
5266
5267 MI.eraseFromParent();
5268 return Legalized;
5269}
5270
5273 unsigned TypeIdx,
5274 LLT NarrowTy) {
5275 const int NumDst = MI.getNumOperands() - 1;
5276 const Register SrcReg = MI.getOperand(NumDst).getReg();
5277 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5278 LLT SrcTy = MRI.getType(SrcReg);
5279
5280 if (TypeIdx != 1 || NarrowTy == DstTy)
5281 return UnableToLegalize;
5282
5283 // Requires compatible types. Otherwise SrcReg should have been defined by
5284 // merge-like instruction that would get artifact combined. Most likely
5285 // instruction that defines SrcReg has to perform more/fewer elements
5286 // legalization compatible with NarrowTy.
5287 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5288 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5289
5290 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5291 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5292 return UnableToLegalize;
5293
5294 // This is most likely DstTy (smaller then register size) packed in SrcTy
5295 // (larger then register size) and since unmerge was not combined it will be
5296 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5297 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5298
5299 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5300 //
5301 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5302 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5303 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5304 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5305 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5306 const int PartsPerUnmerge = NumDst / NumUnmerge;
5307
5308 for (int I = 0; I != NumUnmerge; ++I) {
5309 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5310
5311 for (int J = 0; J != PartsPerUnmerge; ++J)
5312 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5313 MIB.addUse(Unmerge.getReg(I));
5314 }
5315
5316 MI.eraseFromParent();
5317 return Legalized;
5318}
5319
5322 LLT NarrowTy) {
5323 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5324 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5325 // that should have been artifact combined. Most likely instruction that uses
5326 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5327 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5328 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5329 if (NarrowTy == SrcTy)
5330 return UnableToLegalize;
5331
5332 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5333 // is for old mir tests. Since the changes to more/fewer elements it should no
5334 // longer be possible to generate MIR like this when starting from llvm-ir
5335 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5336 if (TypeIdx == 1) {
5337 assert(SrcTy.isVector() && "Expected vector types");
5338 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5339 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5340 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5341 return UnableToLegalize;
5342 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5343 //
5344 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5345 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5346 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5347 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5348 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5349 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5350
5352 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5353 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5354 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5355 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5356 Elts.push_back(Unmerge.getReg(j));
5357 }
5358
5359 SmallVector<Register, 8> NarrowTyElts;
5360 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5361 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5362 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5363 ++i, Offset += NumNarrowTyElts) {
5364 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5365 NarrowTyElts.push_back(
5366 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5367 }
5368
5369 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5370 MI.eraseFromParent();
5371 return Legalized;
5372 }
5373
5374 assert(TypeIdx == 0 && "Bad type index");
5375 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5376 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5377 return UnableToLegalize;
5378
5379 // This is most likely SrcTy (smaller then register size) packed in DstTy
5380 // (larger then register size) and since merge was not combined it will be
5381 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5382 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5383
5384 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5385 //
5386 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5387 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5388 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5389 SmallVector<Register, 8> NarrowTyElts;
5390 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5391 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5392 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5393 for (unsigned i = 0; i < NumParts; ++i) {
5395 for (unsigned j = 0; j < NumElts; ++j)
5396 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5397 NarrowTyElts.push_back(
5398 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5399 }
5400
5401 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5402 MI.eraseFromParent();
5403 return Legalized;
5404}
5405
5408 unsigned TypeIdx,
5409 LLT NarrowVecTy) {
5410 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5411 Register InsertVal;
5412 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5413
5414 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5415 if (IsInsert)
5416 InsertVal = MI.getOperand(2).getReg();
5417
5418 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5419 LLT VecTy = MRI.getType(SrcVec);
5420
5421 // If the index is a constant, we can really break this down as you would
5422 // expect, and index into the target size pieces.
5423 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5424 if (MaybeCst) {
5425 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5426 // Avoid out of bounds indexing the pieces.
5427 if (IdxVal >= VecTy.getNumElements()) {
5428 MIRBuilder.buildUndef(DstReg);
5429 MI.eraseFromParent();
5430 return Legalized;
5431 }
5432
5433 if (!NarrowVecTy.isVector()) {
5434 SmallVector<Register, 8> SplitPieces;
5435 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5436 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5437 if (IsInsert) {
5438 SplitPieces[IdxVal] = InsertVal;
5439 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5440 } else {
5441 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5442 }
5443 } else {
5444 SmallVector<Register, 8> VecParts;
5445 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5446
5447 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5448 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5449 TargetOpcode::G_ANYEXT);
5450
5451 unsigned NewNumElts = NarrowVecTy.getNumElements();
5452
5453 LLT IdxTy = MRI.getType(Idx);
5454 int64_t PartIdx = IdxVal / NewNumElts;
5455 auto NewIdx =
5456 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5457
5458 if (IsInsert) {
5459 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5460
5461 // Use the adjusted index to insert into one of the subvectors.
5462 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5463 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5464 VecParts[PartIdx] = InsertPart.getReg(0);
5465
5466 // Recombine the inserted subvector with the others to reform the result
5467 // vector.
5468 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5469 } else {
5470 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5471 }
5472 }
5473
5474 MI.eraseFromParent();
5475 return Legalized;
5476 }
5477
5478 // With a variable index, we can't perform the operation in a smaller type, so
5479 // we're forced to expand this.
5480 //
5481 // TODO: We could emit a chain of compare/select to figure out which piece to
5482 // index.
5484}
5485
5488 LLT NarrowTy) {
5489 // FIXME: Don't know how to handle secondary types yet.
5490 if (TypeIdx != 0)
5491 return UnableToLegalize;
5492
5493 if (!NarrowTy.isByteSized()) {
5494 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5495 return UnableToLegalize;
5496 }
5497
5498 // This implementation doesn't work for atomics. Give up instead of doing
5499 // something invalid.
5500 if (LdStMI.isAtomic())
5501 return UnableToLegalize;
5502
5503 bool IsLoad = isa<GLoad>(LdStMI);
5504 Register ValReg = LdStMI.getReg(0);
5505 Register AddrReg = LdStMI.getPointerReg();
5506 LLT ValTy = MRI.getType(ValReg);
5507
5508 // FIXME: Do we need a distinct NarrowMemory legalize action?
5509 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5510 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5511 return UnableToLegalize;
5512 }
5513
5514 int NumParts = -1;
5515 int NumLeftover = -1;
5516 LLT LeftoverTy;
5517 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5518 if (IsLoad) {
5519 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5520 } else {
5521 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5522 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5523 NumParts = NarrowRegs.size();
5524 NumLeftover = NarrowLeftoverRegs.size();
5525 }
5526 }
5527
5528 if (NumParts == -1)
5529 return UnableToLegalize;
5530
5531 LLT PtrTy = MRI.getType(AddrReg);
5532 const LLT OffsetTy = LLT::integer(PtrTy.getSizeInBits());
5533
5534 unsigned TotalSize = ValTy.getSizeInBits();
5535
5536 // Split the load/store into PartTy sized pieces starting at Offset. If this
5537 // is a load, return the new registers in ValRegs. For a store, each elements
5538 // of ValRegs should be PartTy. Returns the next offset that needs to be
5539 // handled.
5540 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5541 auto MMO = LdStMI.getMMO();
5542 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5543 unsigned NumParts, unsigned Offset) -> unsigned {
5544 MachineFunction &MF = MIRBuilder.getMF();
5545 unsigned PartSize = PartTy.getSizeInBits();
5546 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5547 ++Idx) {
5548 unsigned ByteOffset = Offset / 8;
5549 Register NewAddrReg;
5550
5551 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5552 ByteOffset);
5553
5554 MachineMemOperand *NewMMO =
5555 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5556
5557 if (IsLoad) {
5558 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5559 ValRegs.push_back(Dst);
5560 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5561 } else {
5562 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5563 }
5564 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5565 }
5566
5567 return Offset;
5568 };
5569
5570 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5571 unsigned HandledOffset =
5572 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5573
5574 // Handle the rest of the register if this isn't an even type breakdown.
5575 if (LeftoverTy.isValid())
5576 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5577
5578 if (IsLoad) {
5579 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5580 LeftoverTy, NarrowLeftoverRegs);
5581 }
5582
5583 LdStMI.eraseFromParent();
5584 return Legalized;
5585}
5586
5589 LLT NarrowTy) {
5590 using namespace TargetOpcode;
5592 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5593
5594 switch (MI.getOpcode()) {
5595 case G_IMPLICIT_DEF:
5596 case G_TRUNC:
5597 case G_AND:
5598 case G_OR:
5599 case G_XOR:
5600 case G_ADD:
5601 case G_SUB:
5602 case G_MUL:
5603 case G_PTR_ADD:
5604 case G_SMULH:
5605 case G_UMULH:
5606 case G_FADD:
5607 case G_FMUL:
5608 case G_FSUB:
5609 case G_FNEG:
5610 case G_FABS:
5611 case G_FCANONICALIZE:
5612 case G_FDIV:
5613 case G_FREM:
5614 case G_FMA:
5615 case G_FMAD:
5616 case G_FPOW:
5617 case G_FEXP:
5618 case G_FEXP2:
5619 case G_FEXP10:
5620 case G_FLOG:
5621 case G_FLOG2:
5622 case G_FLOG10:
5623 case G_FLDEXP:
5624 case G_FNEARBYINT:
5625 case G_FCEIL:
5626 case G_FFLOOR:
5627 case G_FRINT:
5628 case G_INTRINSIC_LRINT:
5629 case G_INTRINSIC_LLRINT:
5630 case G_INTRINSIC_ROUND:
5631 case G_INTRINSIC_ROUNDEVEN:
5632 case G_LROUND:
5633 case G_LLROUND:
5634 case G_INTRINSIC_TRUNC:
5635 case G_FMODF:
5636 case G_FCOS:
5637 case G_FSIN:
5638 case G_FTAN:
5639 case G_FACOS:
5640 case G_FASIN:
5641 case G_FATAN:
5642 case G_FATAN2:
5643 case G_FCOSH:
5644 case G_FSINH:
5645 case G_FTANH:
5646 case G_FSQRT:
5647 case G_BSWAP:
5648 case G_BITREVERSE:
5649 case G_SDIV:
5650 case G_UDIV:
5651 case G_SREM:
5652 case G_UREM:
5653 case G_SDIVREM:
5654 case G_UDIVREM:
5655 case G_SMIN:
5656 case G_SMAX:
5657 case G_UMIN:
5658 case G_UMAX:
5659 case G_ABS:
5660 case G_FMINNUM:
5661 case G_FMAXNUM:
5662 case G_FMINNUM_IEEE:
5663 case G_FMAXNUM_IEEE:
5664 case G_FMINIMUM:
5665 case G_FMAXIMUM:
5666 case G_FMINIMUMNUM:
5667 case G_FMAXIMUMNUM:
5668 case G_FSHL:
5669 case G_FSHR:
5670 case G_ROTL:
5671 case G_ROTR:
5672 case G_FREEZE:
5673 case G_SADDSAT:
5674 case G_SSUBSAT:
5675 case G_UADDSAT:
5676 case G_USUBSAT:
5677 case G_UMULO:
5678 case G_SMULO:
5679 case G_SHL:
5680 case G_LSHR:
5681 case G_ASHR:
5682 case G_SSHLSAT:
5683 case G_USHLSAT:
5684 case G_CTLZ:
5685 case G_CTLZ_ZERO_UNDEF:
5686 case G_CTTZ:
5687 case G_CTTZ_ZERO_UNDEF:
5688 case G_CTPOP:
5689 case G_CTLS:
5690 case G_FCOPYSIGN:
5691 case G_ZEXT:
5692 case G_SEXT:
5693 case G_ANYEXT:
5694 case G_FPEXT:
5695 case G_FPTRUNC:
5696 case G_SITOFP:
5697 case G_UITOFP:
5698 case G_FPTOSI:
5699 case G_FPTOUI:
5700 case G_FPTOSI_SAT:
5701 case G_FPTOUI_SAT:
5702 case G_INTTOPTR:
5703 case G_PTRTOINT:
5704 case G_ADDRSPACE_CAST:
5705 case G_UADDO:
5706 case G_USUBO:
5707 case G_UADDE:
5708 case G_USUBE:
5709 case G_SADDO:
5710 case G_SSUBO:
5711 case G_SADDE:
5712 case G_SSUBE:
5713 case G_STRICT_FADD:
5714 case G_STRICT_FSUB:
5715 case G_STRICT_FMUL:
5716 case G_STRICT_FMA:
5717 case G_STRICT_FLDEXP:
5718 case G_FFREXP:
5719 case G_TRUNC_SSAT_S:
5720 case G_TRUNC_SSAT_U:
5721 case G_TRUNC_USAT_U:
5722 return fewerElementsVectorMultiEltType(GMI, NumElts);
5723 case G_ICMP:
5724 case G_FCMP:
5725 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5726 case G_IS_FPCLASS:
5727 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5728 case G_SELECT:
5729 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5730 return fewerElementsVectorMultiEltType(GMI, NumElts);
5731 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5732 case G_PHI:
5733 return fewerElementsVectorPhi(GMI, NumElts);
5734 case G_UNMERGE_VALUES:
5735 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5736 case G_BUILD_VECTOR:
5737 assert(TypeIdx == 0 && "not a vector type index");
5738 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5739 case G_CONCAT_VECTORS:
5740 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5741 return UnableToLegalize;
5742 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5743 case G_EXTRACT_VECTOR_ELT:
5744 case G_INSERT_VECTOR_ELT:
5745 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5746 case G_LOAD:
5747 case G_STORE:
5748 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5749 case G_SEXT_INREG:
5750 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5752 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5753 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5754 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5755 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5756 case G_SHUFFLE_VECTOR:
5757 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5758 case G_FPOWI:
5759 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5760 case G_BITCAST:
5761 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5762 case G_INTRINSIC_FPTRUNC_ROUND:
5763 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5764 default:
5765 return UnableToLegalize;
5766 }
5767}
5768
5771 LLT NarrowTy) {
5772 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5773 "Not a bitcast operation");
5774
5775 if (TypeIdx != 0)
5776 return UnableToLegalize;
5777
5778 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5779
5780 unsigned NewElemCount =
5781 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5782 SmallVector<Register> SrcVRegs, BitcastVRegs;
5783 if (NewElemCount == 1) {
5784 LLT SrcNarrowTy = SrcTy.getElementType();
5785
5786 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5787 getUnmergeResults(SrcVRegs, *Unmerge);
5788 } else {
5789 LLT SrcNarrowTy =
5791
5792 // Split the Src and Dst Reg into smaller registers
5793 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5794 return UnableToLegalize;
5795 }
5796
5797 // Build new smaller bitcast instructions
5798 // Not supporting Leftover types for now but will have to
5799 for (Register Reg : SrcVRegs)
5800 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5801
5802 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5803 MI.eraseFromParent();
5804 return Legalized;
5805}
5806
5808 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5809 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5810 if (TypeIdx != 0)
5811 return UnableToLegalize;
5812
5813 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5814 MI.getFirst3RegLLTs();
5815 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5816 // The shuffle should be canonicalized by now.
5817 if (DstTy != Src1Ty)
5818 return UnableToLegalize;
5819 if (DstTy != Src2Ty)
5820 return UnableToLegalize;
5821
5822 if (!isPowerOf2_32(DstTy.getNumElements()))
5823 return UnableToLegalize;
5824
5825 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5826 // Further legalization attempts will be needed to do split further.
5827 NarrowTy =
5828 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5829 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5830
5831 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5832 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5833 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5834 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5835 SplitSrc2Regs[1]};
5836
5837 Register Hi, Lo;
5838
5839 // If Lo or Hi uses elements from at most two of the four input vectors, then
5840 // express it as a vector shuffle of those two inputs. Otherwise extract the
5841 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5843 for (unsigned High = 0; High < 2; ++High) {
5844 Register &Output = High ? Hi : Lo;
5845
5846 // Build a shuffle mask for the output, discovering on the fly which
5847 // input vectors to use as shuffle operands (recorded in InputUsed).
5848 // If building a suitable shuffle vector proves too hard, then bail
5849 // out with useBuildVector set.
5850 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5851 unsigned FirstMaskIdx = High * NewElts;
5852 bool UseBuildVector = false;
5853 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5854 // The mask element. This indexes into the input.
5855 int Idx = Mask[FirstMaskIdx + MaskOffset];
5856
5857 // The input vector this mask element indexes into.
5858 unsigned Input = (unsigned)Idx / NewElts;
5859
5860 if (Input >= std::size(Inputs)) {
5861 // The mask element does not index into any input vector.
5862 Ops.push_back(-1);
5863 continue;
5864 }
5865
5866 // Turn the index into an offset from the start of the input vector.
5867 Idx -= Input * NewElts;
5868
5869 // Find or create a shuffle vector operand to hold this input.
5870 unsigned OpNo;
5871 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5872 if (InputUsed[OpNo] == Input) {
5873 // This input vector is already an operand.
5874 break;
5875 } else if (InputUsed[OpNo] == -1U) {
5876 // Create a new operand for this input vector.
5877 InputUsed[OpNo] = Input;
5878 break;
5879 }
5880 }
5881
5882 if (OpNo >= std::size(InputUsed)) {
5883 // More than two input vectors used! Give up on trying to create a
5884 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5885 UseBuildVector = true;
5886 break;
5887 }
5888
5889 // Add the mask index for the new shuffle vector.
5890 Ops.push_back(Idx + OpNo * NewElts);
5891 }
5892
5893 if (UseBuildVector) {
5894 LLT EltTy = NarrowTy.getElementType();
5896
5897 // Extract the input elements by hand.
5898 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5899 // The mask element. This indexes into the input.
5900 int Idx = Mask[FirstMaskIdx + MaskOffset];
5901
5902 // The input vector this mask element indexes into.
5903 unsigned Input = (unsigned)Idx / NewElts;
5904
5905 if (Input >= std::size(Inputs)) {
5906 // The mask element is "undef" or indexes off the end of the input.
5907 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5908 continue;
5909 }
5910
5911 // Turn the index into an offset from the start of the input vector.
5912 Idx -= Input * NewElts;
5913
5914 // Extract the vector element by hand.
5915 SVOps.push_back(MIRBuilder
5916 .buildExtractVectorElement(
5917 EltTy, Inputs[Input],
5918 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5919 .getReg(0));
5920 }
5921
5922 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5923 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5924 } else if (InputUsed[0] == -1U) {
5925 // No input vectors were used! The result is undefined.
5926 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5927 } else if (NewElts == 1) {
5928 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5929 } else {
5930 Register Op0 = Inputs[InputUsed[0]];
5931 // If only one input was used, use an undefined vector for the other.
5932 Register Op1 = InputUsed[1] == -1U
5933 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5934 : Inputs[InputUsed[1]];
5935 // At least one input vector was used. Create a new shuffle vector.
5936 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5937 }
5938
5939 Ops.clear();
5940 }
5941
5942 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5943 MI.eraseFromParent();
5944 return Legalized;
5945}
5946
5948 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5949 auto &RdxMI = cast<GVecReduce>(MI);
5950
5951 if (TypeIdx != 1)
5952 return UnableToLegalize;
5953
5954 // The semantics of the normal non-sequential reductions allow us to freely
5955 // re-associate the operation.
5956 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5957
5958 if (NarrowTy.isVector() &&
5959 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5960 return UnableToLegalize;
5961
5962 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5963 SmallVector<Register> SplitSrcs;
5964 // If NarrowTy is a scalar then we're being asked to scalarize.
5965 const unsigned NumParts =
5966 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5967 : SrcTy.getNumElements();
5968
5969 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5970 if (NarrowTy.isScalar()) {
5971 if (DstTy != NarrowTy)
5972 return UnableToLegalize; // FIXME: handle implicit extensions.
5973
5974 if (isPowerOf2_32(NumParts)) {
5975 // Generate a tree of scalar operations to reduce the critical path.
5976 SmallVector<Register> PartialResults;
5977 unsigned NumPartsLeft = NumParts;
5978 while (NumPartsLeft > 1) {
5979 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5980 PartialResults.emplace_back(
5982 .buildInstr(ScalarOpc, {NarrowTy},
5983 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5984 .getReg(0));
5985 }
5986 SplitSrcs = PartialResults;
5987 PartialResults.clear();
5988 NumPartsLeft = SplitSrcs.size();
5989 }
5990 assert(SplitSrcs.size() == 1);
5991 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5992 MI.eraseFromParent();
5993 return Legalized;
5994 }
5995 // If we can't generate a tree, then just do sequential operations.
5996 Register Acc = SplitSrcs[0];
5997 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5998 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5999 .getReg(0);
6000 MIRBuilder.buildCopy(DstReg, Acc);
6001 MI.eraseFromParent();
6002 return Legalized;
6003 }
6004 SmallVector<Register> PartialReductions;
6005 for (unsigned Part = 0; Part < NumParts; ++Part) {
6006 PartialReductions.push_back(
6007 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6008 .getReg(0));
6009 }
6010
6011 // If the types involved are powers of 2, we can generate intermediate vector
6012 // ops, before generating a final reduction operation.
6013 if (isPowerOf2_32(SrcTy.getNumElements()) &&
6014 isPowerOf2_32(NarrowTy.getNumElements())) {
6015 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6016 }
6017
6018 Register Acc = PartialReductions[0];
6019 for (unsigned Part = 1; Part < NumParts; ++Part) {
6020 if (Part == NumParts - 1) {
6021 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
6022 {Acc, PartialReductions[Part]});
6023 } else {
6024 Acc = MIRBuilder
6025 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6026 .getReg(0);
6027 }
6028 }
6029 MI.eraseFromParent();
6030 return Legalized;
6031}
6032
6035 unsigned int TypeIdx,
6036 LLT NarrowTy) {
6037 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6038 MI.getFirst3RegLLTs();
6039 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6040 DstTy != NarrowTy)
6041 return UnableToLegalize;
6042
6043 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6044 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6045 "Unexpected vecreduce opcode");
6046 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6047 ? TargetOpcode::G_FADD
6048 : TargetOpcode::G_FMUL;
6049
6050 SmallVector<Register> SplitSrcs;
6051 unsigned NumParts = SrcTy.getNumElements();
6052 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
6053 Register Acc = ScalarReg;
6054 for (unsigned i = 0; i < NumParts; i++)
6055 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6056 .getReg(0);
6057
6058 MIRBuilder.buildCopy(DstReg, Acc);
6059 MI.eraseFromParent();
6060 return Legalized;
6061}
6062
6064LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
6065 LLT SrcTy, LLT NarrowTy,
6066 unsigned ScalarOpc) {
6067 SmallVector<Register> SplitSrcs;
6068 // Split the sources into NarrowTy size pieces.
6069 extractParts(SrcReg, NarrowTy,
6070 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
6071 MIRBuilder, MRI);
6072 // We're going to do a tree reduction using vector operations until we have
6073 // one NarrowTy size value left.
6074 while (SplitSrcs.size() > 1) {
6075 SmallVector<Register> PartialRdxs;
6076 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6077 Register LHS = SplitSrcs[Idx];
6078 Register RHS = SplitSrcs[Idx + 1];
6079 // Create the intermediate vector op.
6080 Register Res =
6081 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6082 PartialRdxs.push_back(Res);
6083 }
6084 SplitSrcs = std::move(PartialRdxs);
6085 }
6086 // Finally generate the requested NarrowTy based reduction.
6087 Observer.changingInstr(MI);
6088 MI.getOperand(1).setReg(SplitSrcs[0]);
6089 Observer.changedInstr(MI);
6090 return Legalized;
6091}
6092
6095 const LLT HalfTy, const LLT AmtTy) {
6096
6097 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6098 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6099 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6100
6101 if (Amt.isZero()) {
6102 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6103 MI.eraseFromParent();
6104 return Legalized;
6105 }
6106
6107 LLT NVT = HalfTy;
6108 unsigned NVTBits = HalfTy.getSizeInBits();
6109 unsigned VTBits = 2 * NVTBits;
6110
6111 SrcOp Lo(Register(0)), Hi(Register(0));
6112 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6113 if (Amt.ugt(VTBits)) {
6114 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6115 } else if (Amt.ugt(NVTBits)) {
6116 Lo = MIRBuilder.buildConstant(NVT, 0);
6117 Hi = MIRBuilder.buildShl(NVT, InL,
6118 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6119 } else if (Amt == NVTBits) {
6120 Lo = MIRBuilder.buildConstant(NVT, 0);
6121 Hi = InL;
6122 } else {
6123 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6124 auto OrLHS =
6125 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6126 auto OrRHS = MIRBuilder.buildLShr(
6127 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6128 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6129 }
6130 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6131 if (Amt.ugt(VTBits)) {
6132 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6133 } else if (Amt.ugt(NVTBits)) {
6134 Lo = MIRBuilder.buildLShr(NVT, InH,
6135 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6136 Hi = MIRBuilder.buildConstant(NVT, 0);
6137 } else if (Amt == NVTBits) {
6138 Lo = InH;
6139 Hi = MIRBuilder.buildConstant(NVT, 0);
6140 } else {
6141 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6142
6143 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6144 auto OrRHS = MIRBuilder.buildShl(
6145 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6146
6147 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6148 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6149 }
6150 } else {
6151 if (Amt.ugt(VTBits)) {
6152 Hi = Lo = MIRBuilder.buildAShr(
6153 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6154 } else if (Amt.ugt(NVTBits)) {
6155 Lo = MIRBuilder.buildAShr(NVT, InH,
6156 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6157 Hi = MIRBuilder.buildAShr(NVT, InH,
6158 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6159 } else if (Amt == NVTBits) {
6160 Lo = InH;
6161 Hi = MIRBuilder.buildAShr(NVT, InH,
6162 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6163 } else {
6164 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6165
6166 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6167 auto OrRHS = MIRBuilder.buildShl(
6168 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6169
6170 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6171 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6172 }
6173 }
6174
6175 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6176 MI.eraseFromParent();
6177
6178 return Legalized;
6179}
6180
6183 LLT RequestedTy) {
6184 if (TypeIdx == 1) {
6185 Observer.changingInstr(MI);
6186 narrowScalarSrc(MI, RequestedTy, 2);
6187 Observer.changedInstr(MI);
6188 return Legalized;
6189 }
6190
6191 Register DstReg = MI.getOperand(0).getReg();
6192 LLT DstTy = MRI.getType(DstReg);
6193 if (DstTy.isVector())
6194 return UnableToLegalize;
6195
6196 Register Amt = MI.getOperand(2).getReg();
6197 LLT ShiftAmtTy = MRI.getType(Amt);
6198 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6199 if (DstEltSize % 2 != 0)
6200 return UnableToLegalize;
6201
6202 // Check if we should use multi-way splitting instead of recursive binary
6203 // splitting.
6204 //
6205 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6206 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6207 // and dependency chains created by usual binary splitting approach
6208 // (128->64->32).
6209 //
6210 // The >= 8 parts threshold ensures we only use this optimization when binary
6211 // splitting would require multiple recursive passes, avoiding overhead for
6212 // simple 2-way splits where binary approach is sufficient.
6213 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6214 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6215 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6216 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6217 // steps).
6218 if (NumParts >= 8)
6219 return narrowScalarShiftMultiway(MI, RequestedTy);
6220 }
6221
6222 // Fall back to binary splitting:
6223 // Ignore the input type. We can only go to exactly half the size of the
6224 // input. If that isn't small enough, the resulting pieces will be further
6225 // legalized.
6226 const unsigned NewBitSize = DstEltSize / 2;
6227 const LLT HalfTy = DstTy.getScalarType().changeElementSize(NewBitSize);
6228 const LLT CondTy = LLT::integer(1);
6229
6230 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6231 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6232 ShiftAmtTy);
6233 }
6234
6235 // TODO: Expand with known bits.
6236
6237 // Handle the fully general expansion by an unknown amount.
6238 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6239
6240 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6241 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6242 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6243
6244 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6245 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6246
6247 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6248 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6249 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6250
6251 Register ResultRegs[2];
6252 switch (MI.getOpcode()) {
6253 case TargetOpcode::G_SHL: {
6254 // Short: ShAmt < NewBitSize
6255 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6256
6257 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6258 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6259 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6260
6261 // Long: ShAmt >= NewBitSize
6262 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6263 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6264
6265 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6266 auto Hi = MIRBuilder.buildSelect(
6267 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6268
6269 ResultRegs[0] = Lo.getReg(0);
6270 ResultRegs[1] = Hi.getReg(0);
6271 break;
6272 }
6273 case TargetOpcode::G_LSHR:
6274 case TargetOpcode::G_ASHR: {
6275 // Short: ShAmt < NewBitSize
6276 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6277
6278 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6279 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6280 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6281
6282 // Long: ShAmt >= NewBitSize
6284 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6285 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6286 } else {
6287 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6288 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6289 }
6290 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6291 {InH, AmtExcess}); // Lo from Hi part.
6292
6293 auto Lo = MIRBuilder.buildSelect(
6294 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6295
6296 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6297
6298 ResultRegs[0] = Lo.getReg(0);
6299 ResultRegs[1] = Hi.getReg(0);
6300 break;
6301 }
6302 default:
6303 llvm_unreachable("not a shift");
6304 }
6305
6306 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6307 MI.eraseFromParent();
6308 return Legalized;
6309}
6310
6312 unsigned PartIdx,
6313 unsigned NumParts,
6314 ArrayRef<Register> SrcParts,
6315 const ShiftParams &Params,
6316 LLT TargetTy, LLT ShiftAmtTy) {
6317 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6318 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6319 assert(WordShiftConst && BitShiftConst && "Expected constants");
6320
6321 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6322 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6323 const bool NeedsInterWordShift = ShiftBits != 0;
6324
6325 switch (Opcode) {
6326 case TargetOpcode::G_SHL: {
6327 // Data moves from lower indices to higher indices
6328 // If this part would come from a source beyond our range, it's zero
6329 if (PartIdx < ShiftWords)
6330 return Params.Zero;
6331
6332 unsigned SrcIdx = PartIdx - ShiftWords;
6333 if (!NeedsInterWordShift)
6334 return SrcParts[SrcIdx];
6335
6336 // Combine shifted main part with carry from previous part
6337 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6338 if (SrcIdx > 0) {
6339 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6340 Params.InvBitShift);
6341 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6342 }
6343 return Hi.getReg(0);
6344 }
6345
6346 case TargetOpcode::G_LSHR: {
6347 unsigned SrcIdx = PartIdx + ShiftWords;
6348 if (SrcIdx >= NumParts)
6349 return Params.Zero;
6350 if (!NeedsInterWordShift)
6351 return SrcParts[SrcIdx];
6352
6353 // Combine shifted main part with carry from next part
6354 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6355 if (SrcIdx + 1 < NumParts) {
6356 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6357 Params.InvBitShift);
6358 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6359 }
6360 return Lo.getReg(0);
6361 }
6362
6363 case TargetOpcode::G_ASHR: {
6364 // Like LSHR but preserves sign bit
6365 unsigned SrcIdx = PartIdx + ShiftWords;
6366 if (SrcIdx >= NumParts)
6367 return Params.SignBit;
6368 if (!NeedsInterWordShift)
6369 return SrcParts[SrcIdx];
6370
6371 // Only the original MSB part uses arithmetic shift to preserve sign. All
6372 // other parts use logical shift since they're just moving data bits.
6373 auto Lo =
6374 (SrcIdx == NumParts - 1)
6375 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6376 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6377 Register HiSrc =
6378 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6379 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6380 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6381 }
6382
6383 default:
6384 llvm_unreachable("not a shift");
6385 }
6386}
6387
6389 Register MainOperand,
6390 Register ShiftAmt,
6391 LLT TargetTy,
6392 Register CarryOperand) {
6393 // This helper generates a single output part for variable shifts by combining
6394 // the main operand (shifted by BitShift) with carry bits from an adjacent
6395 // part.
6396
6397 // For G_ASHR, individual parts don't have their own sign bit, only the
6398 // complete value does. So we use LSHR for the main operand shift in ASHR
6399 // context.
6400 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6401 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6402 : Opcode;
6403
6404 // Perform the primary shift on the main operand
6405 Register MainShifted =
6406 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6407 .getReg(0);
6408
6409 // No carry operand available
6410 if (!CarryOperand.isValid())
6411 return MainShifted;
6412
6413 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6414 // so carry bits aren't needed.
6415 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6416 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6417 LLT BoolTy = LLT::scalar(1);
6418 auto IsZeroBitShift =
6419 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6420
6421 // Extract bits from the adjacent part that will "carry over" into this part.
6422 // The carry direction is opposite to the main shift direction, so we can
6423 // align the two shifted values before combining them with OR.
6424
6425 // Determine the carry shift opcode (opposite direction)
6426 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6427 : TargetOpcode::G_SHL;
6428
6429 // Calculate inverse shift amount: BitWidth - ShiftAmt
6430 auto TargetBitsConst =
6431 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6432 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6433
6434 // Shift the carry operand
6435 Register CarryBits =
6437 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6438 .getReg(0);
6439
6440 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6441 // TargetBits which would be poison for the individual carry shift operation).
6442 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6443 Register SafeCarryBits =
6444 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6445 .getReg(0);
6446
6447 // Combine the main shifted part with the carry bits
6448 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6449}
6450
6453 const APInt &Amt,
6454 LLT TargetTy,
6455 LLT ShiftAmtTy) {
6456 // Any wide shift can be decomposed into WordShift + BitShift components.
6457 // When shift amount is known constant, directly compute the decomposition
6458 // values and generate constant registers.
6459 Register DstReg = MI.getOperand(0).getReg();
6460 Register SrcReg = MI.getOperand(1).getReg();
6461 LLT DstTy = MRI.getType(DstReg);
6462
6463 const unsigned DstBits = DstTy.getScalarSizeInBits();
6464 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6465 const unsigned NumParts = DstBits / TargetBits;
6466
6467 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6468
6469 // When the shift amount is known at compile time, we just calculate which
6470 // source parts contribute to each output part.
6471
6472 SmallVector<Register, 8> SrcParts;
6473 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6474
6475 if (Amt.isZero()) {
6476 // No shift needed, just copy
6477 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6478 MI.eraseFromParent();
6479 return Legalized;
6480 }
6481
6482 ShiftParams Params;
6483 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6484 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6485
6486 // Generate constants and values needed by all shift types
6487 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6488 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6489 Params.InvBitShift =
6490 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6491 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6492
6493 // For ASHR, we need the sign-extended value to fill shifted-out positions
6494 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6495 Params.SignBit =
6497 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6498 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6499 .getReg(0);
6500
6501 SmallVector<Register, 8> DstParts(NumParts);
6502 for (unsigned I = 0; I < NumParts; ++I)
6503 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6504 Params, TargetTy, ShiftAmtTy);
6505
6506 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6507 MI.eraseFromParent();
6508 return Legalized;
6509}
6510
6513 Register DstReg = MI.getOperand(0).getReg();
6514 Register SrcReg = MI.getOperand(1).getReg();
6515 Register AmtReg = MI.getOperand(2).getReg();
6516 LLT DstTy = MRI.getType(DstReg);
6517 LLT ShiftAmtTy = MRI.getType(AmtReg);
6518
6519 const unsigned DstBits = DstTy.getScalarSizeInBits();
6520 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6521 const unsigned NumParts = DstBits / TargetBits;
6522
6523 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6524 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6525
6526 // If the shift amount is known at compile time, we can use direct indexing
6527 // instead of generating select chains in the general case.
6528 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6529 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6530 ShiftAmtTy);
6531
6532 // For runtime-variable shift amounts, we must generate a more complex
6533 // sequence that handles all possible shift values using select chains.
6534
6535 // Split the input into target-sized pieces
6536 SmallVector<Register, 8> SrcParts;
6537 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6538
6539 // Shifting by zero should be a no-op.
6540 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6541 LLT BoolTy = LLT::scalar(1);
6542 auto IsZeroShift =
6543 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6544
6545 // Any wide shift can be decomposed into two components:
6546 // 1. WordShift: number of complete target-sized words to shift
6547 // 2. BitShift: number of bits to shift within each word
6548 //
6549 // Example: 128-bit >> 50 with 32-bit target:
6550 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6551 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6552 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6553 auto TargetBitsLog2Const =
6554 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6555 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6556
6557 Register WordShift =
6558 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6559 Register BitShift =
6560 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6561
6562 // Fill values:
6563 // - SHL/LSHR: fill with zeros
6564 // - ASHR: fill with sign-extended MSB
6565 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6566
6567 Register FillValue;
6568 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6569 auto TargetBitsMinusOneConst =
6570 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6571 FillValue = MIRBuilder
6572 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6573 TargetBitsMinusOneConst)
6574 .getReg(0);
6575 } else {
6576 FillValue = ZeroReg;
6577 }
6578
6579 SmallVector<Register, 8> DstParts(NumParts);
6580
6581 // For each output part, generate a select chain that chooses the correct
6582 // result based on the runtime WordShift value. This handles all possible
6583 // word shift amounts by pre-calculating what each would produce.
6584 for (unsigned I = 0; I < NumParts; ++I) {
6585 // Initialize with appropriate default value for this shift type
6586 Register InBoundsResult = FillValue;
6587
6588 // clang-format off
6589 // Build a branchless select chain by pre-computing results for all possible
6590 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6591 //
6592 // K=0: select(WordShift==0, result0, FillValue)
6593 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6594 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6595 // clang-format on
6596 for (unsigned K = 0; K < NumParts; ++K) {
6597 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6598 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6599 WordShift, WordShiftKConst);
6600
6601 // Calculate source indices for this word shift
6602 //
6603 // For 4-part 128-bit value with K=1 word shift:
6604 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6605 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6606 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6607 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6608 int MainSrcIdx;
6609 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6610
6611 switch (MI.getOpcode()) {
6612 case TargetOpcode::G_SHL:
6613 MainSrcIdx = (int)I - (int)K;
6614 CarrySrcIdx = MainSrcIdx - 1;
6615 break;
6616 case TargetOpcode::G_LSHR:
6617 case TargetOpcode::G_ASHR:
6618 MainSrcIdx = (int)I + (int)K;
6619 CarrySrcIdx = MainSrcIdx + 1;
6620 break;
6621 default:
6622 llvm_unreachable("Not a shift");
6623 }
6624
6625 // Check bounds and build the result for this word shift
6626 Register ResultForK;
6627 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6628 Register MainOp = SrcParts[MainSrcIdx];
6629 Register CarryOp;
6630
6631 // Determine carry operand with bounds checking
6632 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6633 CarryOp = SrcParts[CarrySrcIdx];
6634 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6635 CarrySrcIdx >= (int)NumParts)
6636 CarryOp = FillValue; // Use sign extension
6637
6638 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6639 TargetTy, CarryOp);
6640 } else {
6641 // Out of bounds - use fill value for this k
6642 ResultForK = FillValue;
6643 }
6644
6645 // Select this result if WordShift equals k
6646 InBoundsResult =
6648 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6649 .getReg(0);
6650 }
6651
6652 // Handle zero-shift special case: if shift is 0, use original input
6653 DstParts[I] =
6655 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6656 .getReg(0);
6657 }
6658
6659 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6660 MI.eraseFromParent();
6661 return Legalized;
6662}
6663
6666 LLT MoreTy) {
6667 assert(TypeIdx == 0 && "Expecting only Idx 0");
6668
6669 Observer.changingInstr(MI);
6670 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6671 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6672 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6673 moreElementsVectorSrc(MI, MoreTy, I);
6674 }
6675
6676 MachineBasicBlock &MBB = *MI.getParent();
6677 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6678 moreElementsVectorDst(MI, MoreTy, 0);
6679 Observer.changedInstr(MI);
6680 return Legalized;
6681}
6682
6683MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6684 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6685 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6686
6687 switch (Opcode) {
6688 default:
6690 "getNeutralElementForVecReduce called with invalid opcode!");
6691 case TargetOpcode::G_VECREDUCE_ADD:
6692 case TargetOpcode::G_VECREDUCE_OR:
6693 case TargetOpcode::G_VECREDUCE_XOR:
6694 case TargetOpcode::G_VECREDUCE_UMAX:
6695 return MIRBuilder.buildConstant(Ty, 0);
6696 case TargetOpcode::G_VECREDUCE_MUL:
6697 return MIRBuilder.buildConstant(Ty, 1);
6698 case TargetOpcode::G_VECREDUCE_AND:
6699 case TargetOpcode::G_VECREDUCE_UMIN:
6701 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6702 case TargetOpcode::G_VECREDUCE_SMAX:
6704 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6705 case TargetOpcode::G_VECREDUCE_SMIN:
6707 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6708 case TargetOpcode::G_VECREDUCE_FADD:
6709 return MIRBuilder.buildFConstant(Ty, -0.0);
6710 case TargetOpcode::G_VECREDUCE_FMUL:
6711 return MIRBuilder.buildFConstant(Ty, 1.0);
6712 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6713 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6714 assert(false && "getNeutralElementForVecReduce unimplemented for "
6715 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6716 }
6717 llvm_unreachable("switch expected to return!");
6718}
6719
6722 LLT MoreTy) {
6723 unsigned Opc = MI.getOpcode();
6724 switch (Opc) {
6725 case TargetOpcode::G_IMPLICIT_DEF:
6726 case TargetOpcode::G_LOAD: {
6727 if (TypeIdx != 0)
6728 return UnableToLegalize;
6729 Observer.changingInstr(MI);
6730 moreElementsVectorDst(MI, MoreTy, 0);
6731 Observer.changedInstr(MI);
6732 return Legalized;
6733 }
6734 case TargetOpcode::G_STORE:
6735 if (TypeIdx != 0)
6736 return UnableToLegalize;
6737 Observer.changingInstr(MI);
6738 moreElementsVectorSrc(MI, MoreTy, 0);
6739 Observer.changedInstr(MI);
6740 return Legalized;
6741 case TargetOpcode::G_AND:
6742 case TargetOpcode::G_OR:
6743 case TargetOpcode::G_XOR:
6744 case TargetOpcode::G_ADD:
6745 case TargetOpcode::G_SUB:
6746 case TargetOpcode::G_MUL:
6747 case TargetOpcode::G_FADD:
6748 case TargetOpcode::G_FSUB:
6749 case TargetOpcode::G_FMUL:
6750 case TargetOpcode::G_FDIV:
6751 case TargetOpcode::G_FCOPYSIGN:
6752 case TargetOpcode::G_UADDSAT:
6753 case TargetOpcode::G_USUBSAT:
6754 case TargetOpcode::G_SADDSAT:
6755 case TargetOpcode::G_SSUBSAT:
6756 case TargetOpcode::G_SMIN:
6757 case TargetOpcode::G_SMAX:
6758 case TargetOpcode::G_UMIN:
6759 case TargetOpcode::G_UMAX:
6760 case TargetOpcode::G_FMINNUM:
6761 case TargetOpcode::G_FMAXNUM:
6762 case TargetOpcode::G_FMINNUM_IEEE:
6763 case TargetOpcode::G_FMAXNUM_IEEE:
6764 case TargetOpcode::G_FMINIMUM:
6765 case TargetOpcode::G_FMAXIMUM:
6766 case TargetOpcode::G_FMINIMUMNUM:
6767 case TargetOpcode::G_FMAXIMUMNUM:
6768 case TargetOpcode::G_STRICT_FADD:
6769 case TargetOpcode::G_STRICT_FSUB:
6770 case TargetOpcode::G_STRICT_FMUL: {
6771 Observer.changingInstr(MI);
6772 moreElementsVectorSrc(MI, MoreTy, 1);
6773 moreElementsVectorSrc(MI, MoreTy, 2);
6774 moreElementsVectorDst(MI, MoreTy, 0);
6775 Observer.changedInstr(MI);
6776 return Legalized;
6777 }
6778 case TargetOpcode::G_SHL:
6779 case TargetOpcode::G_ASHR:
6780 case TargetOpcode::G_LSHR: {
6781 Observer.changingInstr(MI);
6782 moreElementsVectorSrc(MI, MoreTy, 1);
6783 // The shift operand may have a different scalar type from the source and
6784 // destination operands.
6785 LLT ShiftMoreTy = MoreTy.changeElementType(
6786 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6787 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6788 moreElementsVectorDst(MI, MoreTy, 0);
6789 Observer.changedInstr(MI);
6790 return Legalized;
6791 }
6792 case TargetOpcode::G_FMA:
6793 case TargetOpcode::G_STRICT_FMA:
6794 case TargetOpcode::G_FSHR:
6795 case TargetOpcode::G_FSHL: {
6796 Observer.changingInstr(MI);
6797 moreElementsVectorSrc(MI, MoreTy, 1);
6798 moreElementsVectorSrc(MI, MoreTy, 2);
6799 moreElementsVectorSrc(MI, MoreTy, 3);
6800 moreElementsVectorDst(MI, MoreTy, 0);
6801 Observer.changedInstr(MI);
6802 return Legalized;
6803 }
6804 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6805 case TargetOpcode::G_EXTRACT:
6806 if (TypeIdx != 1)
6807 return UnableToLegalize;
6808 Observer.changingInstr(MI);
6809 moreElementsVectorSrc(MI, MoreTy, 1);
6810 Observer.changedInstr(MI);
6811 return Legalized;
6812 case TargetOpcode::G_INSERT:
6813 case TargetOpcode::G_INSERT_VECTOR_ELT:
6814 case TargetOpcode::G_FREEZE:
6815 case TargetOpcode::G_FNEG:
6816 case TargetOpcode::G_FABS:
6817 case TargetOpcode::G_FSQRT:
6818 case TargetOpcode::G_FCEIL:
6819 case TargetOpcode::G_FFLOOR:
6820 case TargetOpcode::G_FNEARBYINT:
6821 case TargetOpcode::G_FRINT:
6822 case TargetOpcode::G_INTRINSIC_ROUND:
6823 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6824 case TargetOpcode::G_INTRINSIC_TRUNC:
6825 case TargetOpcode::G_BITREVERSE:
6826 case TargetOpcode::G_BSWAP:
6827 case TargetOpcode::G_FCANONICALIZE:
6828 case TargetOpcode::G_SEXT_INREG:
6829 case TargetOpcode::G_ABS:
6830 case TargetOpcode::G_CTLZ:
6831 case TargetOpcode::G_CTPOP:
6832 if (TypeIdx != 0)
6833 return UnableToLegalize;
6834 Observer.changingInstr(MI);
6835 moreElementsVectorSrc(MI, MoreTy, 1);
6836 moreElementsVectorDst(MI, MoreTy, 0);
6837 Observer.changedInstr(MI);
6838 return Legalized;
6839 case TargetOpcode::G_SELECT: {
6840 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6841 if (TypeIdx == 1) {
6842 if (!CondTy.isScalar() ||
6843 DstTy.getElementCount() != MoreTy.getElementCount())
6844 return UnableToLegalize;
6845
6846 // This is turning a scalar select of vectors into a vector
6847 // select. Broadcast the select condition.
6848 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6849 Observer.changingInstr(MI);
6850 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6851 Observer.changedInstr(MI);
6852 return Legalized;
6853 }
6854
6855 if (CondTy.isVector())
6856 return UnableToLegalize;
6857
6858 Observer.changingInstr(MI);
6859 moreElementsVectorSrc(MI, MoreTy, 2);
6860 moreElementsVectorSrc(MI, MoreTy, 3);
6861 moreElementsVectorDst(MI, MoreTy, 0);
6862 Observer.changedInstr(MI);
6863 return Legalized;
6864 }
6865 case TargetOpcode::G_UNMERGE_VALUES:
6866 return UnableToLegalize;
6867 case TargetOpcode::G_PHI:
6868 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6869 case TargetOpcode::G_SHUFFLE_VECTOR:
6870 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6871 case TargetOpcode::G_BUILD_VECTOR: {
6873 for (auto Op : MI.uses()) {
6874 Elts.push_back(Op.getReg());
6875 }
6876
6877 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6878 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6879 }
6880
6881 MIRBuilder.buildDeleteTrailingVectorElements(
6882 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6883 MI.eraseFromParent();
6884 return Legalized;
6885 }
6886 case TargetOpcode::G_SEXT:
6887 case TargetOpcode::G_ZEXT:
6888 case TargetOpcode::G_ANYEXT:
6889 case TargetOpcode::G_TRUNC:
6890 case TargetOpcode::G_FPTRUNC:
6891 case TargetOpcode::G_FPEXT:
6892 case TargetOpcode::G_FPTOSI:
6893 case TargetOpcode::G_FPTOUI:
6894 case TargetOpcode::G_FPTOSI_SAT:
6895 case TargetOpcode::G_FPTOUI_SAT:
6896 case TargetOpcode::G_SITOFP:
6897 case TargetOpcode::G_UITOFP: {
6898 Observer.changingInstr(MI);
6899 LLT SrcExtTy;
6900 LLT DstExtTy;
6901 if (TypeIdx == 0) {
6902 DstExtTy = MoreTy;
6903 SrcExtTy = MoreTy.changeElementType(
6904 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6905 } else {
6906 DstExtTy = MoreTy.changeElementType(
6907 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6908 SrcExtTy = MoreTy;
6909 }
6910 moreElementsVectorSrc(MI, SrcExtTy, 1);
6911 moreElementsVectorDst(MI, DstExtTy, 0);
6912 Observer.changedInstr(MI);
6913 return Legalized;
6914 }
6915 case TargetOpcode::G_ICMP:
6916 case TargetOpcode::G_FCMP: {
6917 if (TypeIdx != 1)
6918 return UnableToLegalize;
6919
6920 Observer.changingInstr(MI);
6921 moreElementsVectorSrc(MI, MoreTy, 2);
6922 moreElementsVectorSrc(MI, MoreTy, 3);
6923 LLT CondTy = MoreTy.changeVectorElementType(
6924 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6925 moreElementsVectorDst(MI, CondTy, 0);
6926 Observer.changedInstr(MI);
6927 return Legalized;
6928 }
6929 case TargetOpcode::G_BITCAST: {
6930 if (TypeIdx != 0)
6931 return UnableToLegalize;
6932
6933 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6934 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6935
6936 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6937 if (coefficient % DstTy.getNumElements() != 0)
6938 return UnableToLegalize;
6939
6940 coefficient = coefficient / DstTy.getNumElements();
6941
6942 LLT NewTy = SrcTy.changeElementCount(
6943 ElementCount::get(coefficient, MoreTy.isScalable()));
6944 Observer.changingInstr(MI);
6945 moreElementsVectorSrc(MI, NewTy, 1);
6946 moreElementsVectorDst(MI, MoreTy, 0);
6947 Observer.changedInstr(MI);
6948 return Legalized;
6949 }
6950 case TargetOpcode::G_VECREDUCE_FADD:
6951 case TargetOpcode::G_VECREDUCE_FMUL:
6952 case TargetOpcode::G_VECREDUCE_ADD:
6953 case TargetOpcode::G_VECREDUCE_MUL:
6954 case TargetOpcode::G_VECREDUCE_AND:
6955 case TargetOpcode::G_VECREDUCE_OR:
6956 case TargetOpcode::G_VECREDUCE_XOR:
6957 case TargetOpcode::G_VECREDUCE_SMAX:
6958 case TargetOpcode::G_VECREDUCE_SMIN:
6959 case TargetOpcode::G_VECREDUCE_UMAX:
6960 case TargetOpcode::G_VECREDUCE_UMIN: {
6961 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6962 MachineOperand &MO = MI.getOperand(1);
6963 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6964 auto NeutralElement = getNeutralElementForVecReduce(
6965 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6966
6967 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6968 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6969 i != e; i++) {
6970 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6971 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6972 NeutralElement, Idx);
6973 }
6974
6975 Observer.changingInstr(MI);
6976 MO.setReg(NewVec.getReg(0));
6977 Observer.changedInstr(MI);
6978 return Legalized;
6979 }
6980
6981 default:
6982 return UnableToLegalize;
6983 }
6984}
6985
6988 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6989 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6990 unsigned MaskNumElts = Mask.size();
6991 unsigned SrcNumElts = SrcTy.getNumElements();
6992 LLT DestEltTy = DstTy.getElementType();
6993
6994 if (MaskNumElts == SrcNumElts)
6995 return Legalized;
6996
6997 if (MaskNumElts < SrcNumElts) {
6998 // Extend mask to match new destination vector size with
6999 // undef values.
7000 SmallVector<int, 16> NewMask(SrcNumElts, -1);
7001 llvm::copy(Mask, NewMask.begin());
7002
7003 moreElementsVectorDst(MI, SrcTy, 0);
7004 MIRBuilder.setInstrAndDebugLoc(MI);
7005 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7006 MI.getOperand(1).getReg(),
7007 MI.getOperand(2).getReg(), NewMask);
7008 MI.eraseFromParent();
7009
7010 return Legalized;
7011 }
7012
7013 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
7014 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7015 LLT PaddedTy =
7016 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
7017
7018 // Create new source vectors by concatenating the initial
7019 // source vectors with undefined vectors of the same size.
7020 auto Undef = MIRBuilder.buildUndef(SrcTy);
7021 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
7022 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
7023 MOps1[0] = MI.getOperand(1).getReg();
7024 MOps2[0] = MI.getOperand(2).getReg();
7025
7026 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7027 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7028
7029 // Readjust mask for new input vector length.
7030 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
7031 for (unsigned I = 0; I != MaskNumElts; ++I) {
7032 int Idx = Mask[I];
7033 if (Idx >= static_cast<int>(SrcNumElts))
7034 Idx += PaddedMaskNumElts - SrcNumElts;
7035 MappedOps[I] = Idx;
7036 }
7037
7038 // If we got more elements than required, extract subvector.
7039 if (MaskNumElts != PaddedMaskNumElts) {
7040 auto Shuffle =
7041 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7042
7043 SmallVector<Register, 16> Elts(MaskNumElts);
7044 for (unsigned I = 0; I < MaskNumElts; ++I) {
7045 Elts[I] =
7046 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
7047 .getReg(0);
7048 }
7049 MIRBuilder.buildBuildVector(DstReg, Elts);
7050 } else {
7051 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7052 }
7053
7054 MI.eraseFromParent();
7056}
7057
7060 unsigned int TypeIdx, LLT MoreTy) {
7061 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
7062 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7063 unsigned NumElts = DstTy.getNumElements();
7064 unsigned WidenNumElts = MoreTy.getNumElements();
7065
7066 if (DstTy.isVector() && Src1Ty.isVector() &&
7067 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7069 }
7070
7071 if (TypeIdx != 0)
7072 return UnableToLegalize;
7073
7074 // Expect a canonicalized shuffle.
7075 if (DstTy != Src1Ty || DstTy != Src2Ty)
7076 return UnableToLegalize;
7077
7078 moreElementsVectorSrc(MI, MoreTy, 1);
7079 moreElementsVectorSrc(MI, MoreTy, 2);
7080
7081 // Adjust mask based on new input vector length.
7082 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7083 for (unsigned I = 0; I != NumElts; ++I) {
7084 int Idx = Mask[I];
7085 if (Idx < static_cast<int>(NumElts))
7086 NewMask[I] = Idx;
7087 else
7088 NewMask[I] = Idx - NumElts + WidenNumElts;
7089 }
7090 moreElementsVectorDst(MI, MoreTy, 0);
7091 MIRBuilder.setInstrAndDebugLoc(MI);
7092 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7093 MI.getOperand(1).getReg(),
7094 MI.getOperand(2).getReg(), NewMask);
7095 MI.eraseFromParent();
7096 return Legalized;
7097}
7098
7099void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7100 ArrayRef<Register> Src1Regs,
7101 ArrayRef<Register> Src2Regs,
7102 LLT NarrowTy) {
7104 unsigned SrcParts = Src1Regs.size();
7105 unsigned DstParts = DstRegs.size();
7106
7107 unsigned DstIdx = 0; // Low bits of the result.
7108 Register FactorSum =
7109 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7110 DstRegs[DstIdx] = FactorSum;
7111
7112 Register CarrySumPrevDstIdx;
7114
7115 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7116 // Collect low parts of muls for DstIdx.
7117 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7118 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7120 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7121 Factors.push_back(Mul.getReg(0));
7122 }
7123 // Collect high parts of muls from previous DstIdx.
7124 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7125 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7126 MachineInstrBuilder Umulh =
7127 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7128 Factors.push_back(Umulh.getReg(0));
7129 }
7130 // Add CarrySum from additions calculated for previous DstIdx.
7131 if (DstIdx != 1) {
7132 Factors.push_back(CarrySumPrevDstIdx);
7133 }
7134
7135 Register CarrySum;
7136 // Add all factors and accumulate all carries into CarrySum.
7137 if (DstIdx != DstParts - 1) {
7138 MachineInstrBuilder Uaddo =
7139 B.buildUAddo(NarrowTy, LLT::integer(1), Factors[0], Factors[1]);
7140 FactorSum = Uaddo.getReg(0);
7141 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7142 for (unsigned i = 2; i < Factors.size(); ++i) {
7143 MachineInstrBuilder Uaddo =
7144 B.buildUAddo(NarrowTy, LLT::integer(1), FactorSum, Factors[i]);
7145 FactorSum = Uaddo.getReg(0);
7146 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7147 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7148 }
7149 } else {
7150 // Since value for the next index is not calculated, neither is CarrySum.
7151 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7152 for (unsigned i = 2; i < Factors.size(); ++i)
7153 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7154 }
7155
7156 CarrySumPrevDstIdx = CarrySum;
7157 DstRegs[DstIdx] = FactorSum;
7158 Factors.clear();
7159 }
7160}
7161
7164 LLT NarrowTy) {
7165 if (TypeIdx != 0)
7166 return UnableToLegalize;
7167
7168 Register DstReg = MI.getOperand(0).getReg();
7169 LLT DstType = MRI.getType(DstReg);
7170 // FIXME: add support for vector types
7171 if (DstType.isVector())
7172 return UnableToLegalize;
7173
7174 unsigned Opcode = MI.getOpcode();
7175 unsigned OpO, OpE, OpF;
7176 switch (Opcode) {
7177 case TargetOpcode::G_SADDO:
7178 case TargetOpcode::G_SADDE:
7179 case TargetOpcode::G_UADDO:
7180 case TargetOpcode::G_UADDE:
7181 case TargetOpcode::G_ADD:
7182 OpO = TargetOpcode::G_UADDO;
7183 OpE = TargetOpcode::G_UADDE;
7184 OpF = TargetOpcode::G_UADDE;
7185 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7186 OpF = TargetOpcode::G_SADDE;
7187 break;
7188 case TargetOpcode::G_SSUBO:
7189 case TargetOpcode::G_SSUBE:
7190 case TargetOpcode::G_USUBO:
7191 case TargetOpcode::G_USUBE:
7192 case TargetOpcode::G_SUB:
7193 OpO = TargetOpcode::G_USUBO;
7194 OpE = TargetOpcode::G_USUBE;
7195 OpF = TargetOpcode::G_USUBE;
7196 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7197 OpF = TargetOpcode::G_SSUBE;
7198 break;
7199 default:
7200 llvm_unreachable("Unexpected add/sub opcode!");
7201 }
7202
7203 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7204 unsigned NumDefs = MI.getNumExplicitDefs();
7205 Register Src1 = MI.getOperand(NumDefs).getReg();
7206 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7207 Register CarryDst, CarryIn;
7208 if (NumDefs == 2)
7209 CarryDst = MI.getOperand(1).getReg();
7210 if (MI.getNumOperands() == NumDefs + 3)
7211 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7212
7213 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7214 LLT LeftoverTy, DummyTy;
7215 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7216 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7217 MIRBuilder, MRI);
7218 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7219 MRI);
7220
7221 int NarrowParts = Src1Regs.size();
7222 Src1Regs.append(Src1Left);
7223 Src2Regs.append(Src2Left);
7224 DstRegs.reserve(Src1Regs.size());
7225
7226 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7227 Register DstReg =
7228 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7229 Register CarryOut;
7230 // Forward the final carry-out to the destination register
7231 if (i == e - 1 && CarryDst)
7232 CarryOut = CarryDst;
7233 else
7234 CarryOut = MRI.createGenericVirtualRegister(LLT::integer(1));
7235
7236 if (!CarryIn) {
7237 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7238 {Src1Regs[i], Src2Regs[i]});
7239 } else if (i == e - 1) {
7240 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7241 {Src1Regs[i], Src2Regs[i], CarryIn});
7242 } else {
7243 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7244 {Src1Regs[i], Src2Regs[i], CarryIn});
7245 }
7246
7247 DstRegs.push_back(DstReg);
7248 CarryIn = CarryOut;
7249 }
7250 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7251 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7252 ArrayRef(DstRegs).drop_front(NarrowParts));
7253
7254 MI.eraseFromParent();
7255 return Legalized;
7256}
7257
7260 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7261
7262 LLT Ty = MRI.getType(DstReg);
7263 if (Ty.isVector())
7264 return UnableToLegalize;
7265
7266 unsigned Size = Ty.getSizeInBits();
7267 unsigned NarrowSize = NarrowTy.getSizeInBits();
7268 if (Size % NarrowSize != 0)
7269 return UnableToLegalize;
7270
7271 unsigned NumParts = Size / NarrowSize;
7272 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7273 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7274
7275 SmallVector<Register, 2> Src1Parts, Src2Parts;
7276 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7277 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7278 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7279 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7280
7281 // Take only high half of registers if this is high mul.
7282 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7283 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7284 MI.eraseFromParent();
7285 return Legalized;
7286}
7287
7290 LLT NarrowTy) {
7291 if (TypeIdx != 0)
7292 return UnableToLegalize;
7293
7294 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7295
7296 Register Src = MI.getOperand(1).getReg();
7297 LLT SrcTy = MRI.getType(Src);
7298
7299 // If all finite floats fit into the narrowed integer type, we can just swap
7300 // out the result type. This is practically only useful for conversions from
7301 // half to at least 16-bits, so just handle the one case.
7302 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7303 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7304 return UnableToLegalize;
7305
7306 Observer.changingInstr(MI);
7307 narrowScalarDst(MI, NarrowTy, 0,
7308 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7309 Observer.changedInstr(MI);
7310 return Legalized;
7311}
7312
7315 LLT NarrowTy) {
7316 if (TypeIdx != 1)
7317 return UnableToLegalize;
7318
7319 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7320
7321 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7322 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7323 // NarrowSize.
7324 if (SizeOp1 % NarrowSize != 0)
7325 return UnableToLegalize;
7326 int NumParts = SizeOp1 / NarrowSize;
7327
7328 SmallVector<Register, 2> SrcRegs, DstRegs;
7329 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7330 MIRBuilder, MRI);
7331
7332 Register OpReg = MI.getOperand(0).getReg();
7333 uint64_t OpStart = MI.getOperand(2).getImm();
7334 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7335 for (int i = 0; i < NumParts; ++i) {
7336 unsigned SrcStart = i * NarrowSize;
7337
7338 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7339 // No part of the extract uses this subregister, ignore it.
7340 continue;
7341 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7342 // The entire subregister is extracted, forward the value.
7343 DstRegs.push_back(SrcRegs[i]);
7344 continue;
7345 }
7346
7347 // OpSegStart is where this destination segment would start in OpReg if it
7348 // extended infinitely in both directions.
7349 int64_t ExtractOffset;
7350 uint64_t SegSize;
7351 if (OpStart < SrcStart) {
7352 ExtractOffset = 0;
7353 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7354 } else {
7355 ExtractOffset = OpStart - SrcStart;
7356 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7357 }
7358
7359 Register SegReg = SrcRegs[i];
7360 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7361 // A genuine extract is needed.
7362 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7363 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7364 }
7365
7366 DstRegs.push_back(SegReg);
7367 }
7368
7369 Register DstReg = MI.getOperand(0).getReg();
7370 if (MRI.getType(DstReg).isVector())
7371 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7372 else if (DstRegs.size() > 1)
7373 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7374 else
7375 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7376 MI.eraseFromParent();
7377 return Legalized;
7378}
7379
7382 LLT NarrowTy) {
7383 // FIXME: Don't know how to handle secondary types yet.
7384 if (TypeIdx != 0)
7385 return UnableToLegalize;
7386
7387 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7388 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7389 LLT LeftoverTy;
7390 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7391 LeftoverRegs, MIRBuilder, MRI);
7392
7393 SrcRegs.append(LeftoverRegs);
7394
7395 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7396 Register OpReg = MI.getOperand(2).getReg();
7397 uint64_t OpStart = MI.getOperand(3).getImm();
7398 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7399 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7400 unsigned DstStart = I * NarrowSize;
7401
7402 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7403 // The entire subregister is defined by this insert, forward the new
7404 // value.
7405 DstRegs.push_back(OpReg);
7406 continue;
7407 }
7408
7409 Register SrcReg = SrcRegs[I];
7410 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7411 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7412 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7413 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7414 }
7415
7416 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7417 // No part of the insert affects this subregister, forward the original.
7418 DstRegs.push_back(SrcReg);
7419 continue;
7420 }
7421
7422 // OpSegStart is where this destination segment would start in OpReg if it
7423 // extended infinitely in both directions.
7424 int64_t ExtractOffset, InsertOffset;
7425 uint64_t SegSize;
7426 if (OpStart < DstStart) {
7427 InsertOffset = 0;
7428 ExtractOffset = DstStart - OpStart;
7429 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7430 } else {
7431 InsertOffset = OpStart - DstStart;
7432 ExtractOffset = 0;
7433 SegSize =
7434 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7435 }
7436
7437 Register SegReg = OpReg;
7438 if (ExtractOffset != 0 || SegSize != OpSize) {
7439 // A genuine extract is needed.
7440 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7441 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7442 }
7443
7444 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7445 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7446 DstRegs.push_back(DstReg);
7447 }
7448
7449 uint64_t WideSize = DstRegs.size() * NarrowSize;
7450 Register DstReg = MI.getOperand(0).getReg();
7451 if (WideSize > RegTy.getSizeInBits()) {
7452 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7453 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7454 MIRBuilder.buildTrunc(DstReg, MergeReg);
7455 } else
7456 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7457
7458 MI.eraseFromParent();
7459 return Legalized;
7460}
7461
7464 LLT NarrowTy) {
7465 Register DstReg = MI.getOperand(0).getReg();
7466 LLT DstTy = MRI.getType(DstReg);
7467
7468 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7469
7470 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7471 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7472 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7473 LLT LeftoverTy;
7474 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7475 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7476 return UnableToLegalize;
7477
7478 LLT Unused;
7479 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7480 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7481 llvm_unreachable("inconsistent extractParts result");
7482
7483 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7484 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7485 {Src0Regs[I], Src1Regs[I]});
7486 DstRegs.push_back(Inst.getReg(0));
7487 }
7488
7489 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7490 auto Inst = MIRBuilder.buildInstr(
7491 MI.getOpcode(),
7492 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7493 DstLeftoverRegs.push_back(Inst.getReg(0));
7494 }
7495
7496 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7497 LeftoverTy, DstLeftoverRegs);
7498
7499 MI.eraseFromParent();
7500 return Legalized;
7501}
7502
7505 LLT NarrowTy) {
7506 if (TypeIdx != 0)
7507 return UnableToLegalize;
7508
7509 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7510
7511 LLT DstTy = MRI.getType(DstReg);
7512 if (DstTy.isVector())
7513 return UnableToLegalize;
7514
7516 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7517 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7518 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7519
7520 MI.eraseFromParent();
7521 return Legalized;
7522}
7523
7526 LLT NarrowTy) {
7527 if (TypeIdx != 0)
7528 return UnableToLegalize;
7529
7530 Register CondReg = MI.getOperand(1).getReg();
7531 LLT CondTy = MRI.getType(CondReg);
7532 if (CondTy.isVector()) // TODO: Handle vselect
7533 return UnableToLegalize;
7534
7535 Register DstReg = MI.getOperand(0).getReg();
7536 LLT DstTy = MRI.getType(DstReg);
7537
7538 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7539 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7540 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7541 LLT LeftoverTy;
7542 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7543 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7544 return UnableToLegalize;
7545
7546 LLT Unused;
7547 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7548 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7549 llvm_unreachable("inconsistent extractParts result");
7550
7551 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7552 auto Select = MIRBuilder.buildSelect(NarrowTy,
7553 CondReg, Src1Regs[I], Src2Regs[I]);
7554 DstRegs.push_back(Select.getReg(0));
7555 }
7556
7557 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7558 auto Select = MIRBuilder.buildSelect(
7559 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7560 DstLeftoverRegs.push_back(Select.getReg(0));
7561 }
7562
7563 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7564 LeftoverTy, DstLeftoverRegs);
7565
7566 MI.eraseFromParent();
7567 return Legalized;
7568}
7569
7572 LLT NarrowTy) {
7573 if (TypeIdx != 1)
7574 return UnableToLegalize;
7575
7576 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7577 unsigned NarrowSize = NarrowTy.getSizeInBits();
7578
7579 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7580 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7581
7583 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7584 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7585 auto C_0 = B.buildConstant(NarrowTy, 0);
7586 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::integer(1),
7587 UnmergeSrc.getReg(1), C_0);
7588 auto LoCTLZ = IsUndef ?
7589 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7590 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7591 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7592 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7593 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7594 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7595
7596 MI.eraseFromParent();
7597 return Legalized;
7598 }
7599
7600 return UnableToLegalize;
7601}
7602
7605 LLT NarrowTy) {
7606 if (TypeIdx != 1)
7607 return UnableToLegalize;
7608
7609 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7610 unsigned NarrowSize = NarrowTy.getSizeInBits();
7611
7612 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7613 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7614
7616 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7617 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7618 auto C_0 = B.buildConstant(NarrowTy, 0);
7619 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7620 UnmergeSrc.getReg(0), C_0);
7621 auto HiCTTZ = IsUndef ?
7622 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7623 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7624 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7625 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7626 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7627 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7628
7629 MI.eraseFromParent();
7630 return Legalized;
7631 }
7632
7633 return UnableToLegalize;
7634}
7635
7638 LLT NarrowTy) {
7639 if (TypeIdx != 1)
7640 return UnableToLegalize;
7641
7642 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7643 unsigned NarrowSize = NarrowTy.getSizeInBits();
7644
7645 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7646 return UnableToLegalize;
7647
7649
7650 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7651 Register Lo = UnmergeSrc.getReg(0);
7652 Register Hi = UnmergeSrc.getReg(1);
7653
7654 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7655 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7656
7657 auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
7658
7659 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7660 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7661 // Otherwise the leading zeros represent additional sign bits of the original
7662 // value.
7663 auto LoInv = B.buildXor(DstTy, Lo, Sign);
7664 auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
7665
7666 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7667 auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
7668 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7669
7670 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7671
7672 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7673
7674 MI.eraseFromParent();
7675 return Legalized;
7676}
7677
7680 LLT NarrowTy) {
7681 if (TypeIdx != 1)
7682 return UnableToLegalize;
7683
7684 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7685 unsigned NarrowSize = NarrowTy.getSizeInBits();
7686
7687 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7688 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7689
7690 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7691 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7692 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7693
7694 MI.eraseFromParent();
7695 return Legalized;
7696 }
7697
7698 return UnableToLegalize;
7699}
7700
7703 LLT NarrowTy) {
7704 if (TypeIdx != 1)
7705 return UnableToLegalize;
7706
7708 Register ExpReg = MI.getOperand(2).getReg();
7709 LLT ExpTy = MRI.getType(ExpReg);
7710
7711 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7712
7713 // Clamp the exponent to the range of the target type.
7714 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7715 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7716 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7717 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7718
7719 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7720 Observer.changingInstr(MI);
7721 MI.getOperand(2).setReg(Trunc.getReg(0));
7722 Observer.changedInstr(MI);
7723 return Legalized;
7724}
7725
7728 unsigned Opc = MI.getOpcode();
7729 const auto &TII = MIRBuilder.getTII();
7730 auto isSupported = [this](const LegalityQuery &Q) {
7731 auto QAction = LI.getAction(Q).Action;
7732 return QAction == Legal || QAction == Libcall || QAction == Custom;
7733 };
7734 switch (Opc) {
7735 default:
7736 return UnableToLegalize;
7737 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7738 // This trivially expands to CTLZ.
7739 Observer.changingInstr(MI);
7740 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7741 Observer.changedInstr(MI);
7742 return Legalized;
7743 }
7744 case TargetOpcode::G_CTLZ: {
7745 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7746 unsigned Len = SrcTy.getScalarSizeInBits();
7747
7748 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7749 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7750 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7751 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7752 auto ICmp = MIRBuilder.buildICmp(
7753 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7754 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7755 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7756 MI.eraseFromParent();
7757 return Legalized;
7758 }
7759 // for now, we do this:
7760 // NewLen = NextPowerOf2(Len);
7761 // x = x | (x >> 1);
7762 // x = x | (x >> 2);
7763 // ...
7764 // x = x | (x >>16);
7765 // x = x | (x >>32); // for 64-bit input
7766 // Upto NewLen/2
7767 // return Len - popcount(x);
7768 //
7769 // Ref: "Hacker's Delight" by Henry Warren
7770 Register Op = SrcReg;
7771 unsigned NewLen = PowerOf2Ceil(Len);
7772 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7773 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7774 auto MIBOp = MIRBuilder.buildOr(
7775 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7776 Op = MIBOp.getReg(0);
7777 }
7778 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7779 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7780 MIBPop);
7781 MI.eraseFromParent();
7782 return Legalized;
7783 }
7784 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7785 // This trivially expands to CTTZ.
7786 Observer.changingInstr(MI);
7787 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7788 Observer.changedInstr(MI);
7789 return Legalized;
7790 }
7791 case TargetOpcode::G_CTTZ: {
7792 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7793
7794 unsigned Len = SrcTy.getScalarSizeInBits();
7795 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7796 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7797 // zero.
7798 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7799 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7800 auto ICmp = MIRBuilder.buildICmp(
7801 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7802 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7803 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7804 MI.eraseFromParent();
7805 return Legalized;
7806 }
7807 // for now, we use: { return popcount(~x & (x - 1)); }
7808 // unless the target has ctlz but not ctpop, in which case we use:
7809 // { return 32 - nlz(~x & (x-1)); }
7810 // Ref: "Hacker's Delight" by Henry Warren
7811 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7812 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7813 auto MIBTmp = MIRBuilder.buildAnd(
7814 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7815 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7816 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7817 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7818 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7819 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7820 MI.eraseFromParent();
7821 return Legalized;
7822 }
7823 Observer.changingInstr(MI);
7824 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7825 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7826 Observer.changedInstr(MI);
7827 return Legalized;
7828 }
7829 case TargetOpcode::G_CTPOP: {
7830 Register SrcReg = MI.getOperand(1).getReg();
7831 LLT Ty = MRI.getType(SrcReg);
7832 unsigned Size = Ty.getScalarSizeInBits();
7834
7835 // Bail out on irregular type lengths.
7836 if (Size > 128 || Size % 8 != 0)
7837 return UnableToLegalize;
7838
7839 // Count set bits in blocks of 2 bits. Default approach would be
7840 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7841 // We use following formula instead:
7842 // B2Count = val - { (val >> 1) & 0x55555555 }
7843 // since it gives same result in blocks of 2 with one instruction less.
7844 auto C_1 = B.buildConstant(Ty, 1);
7845 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7846 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7847 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7848 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7849 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7850
7851 // In order to get count in blocks of 4 add values from adjacent block of 2.
7852 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7853 auto C_2 = B.buildConstant(Ty, 2);
7854 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7855 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7856 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7857 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7858 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7859 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7860
7861 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7862 // addition since count value sits in range {0,...,8} and 4 bits are enough
7863 // to hold such binary values. After addition high 4 bits still hold count
7864 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7865 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7866 auto C_4 = B.buildConstant(Ty, 4);
7867 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7868 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7869 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7870 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7871 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7872
7873 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7874
7875 // Avoid the multiply when shift-add is cheaper.
7876 if (Size == 16 && !Ty.isVector()) {
7877 // v = (v + (v >> 8)) & 0xFF;
7878 auto C_8 = B.buildConstant(Ty, 8);
7879 auto HighSum = B.buildLShr(Ty, B8Count, C_8);
7880 auto Res = B.buildAdd(Ty, B8Count, HighSum);
7881 B.buildAnd(MI.getOperand(0).getReg(), Res, B.buildConstant(Ty, 0xFF));
7882 MI.eraseFromParent();
7883 return Legalized;
7884 }
7885
7886 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7887 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7888 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7889
7890 // Shift count result from 8 high bits to low bits.
7891 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7892
7893 auto IsMulSupported = [this](const LLT Ty) {
7894 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7895 return Action == Legal || Action == WidenScalar || Action == Custom;
7896 };
7897 if (IsMulSupported(Ty)) {
7898 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7899 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7900 } else {
7901 auto ResTmp = B8Count;
7902 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7903 auto ShiftC = B.buildConstant(Ty, Shift);
7904 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7905 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7906 }
7907 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7908 }
7909 MI.eraseFromParent();
7910 return Legalized;
7911 }
7912 case TargetOpcode::G_CTLS: {
7913 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7914
7915 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7916 auto SignIdxC =
7917 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7918 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7919
7920 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7921
7922 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7923 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7924
7925 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7926 MI.eraseFromParent();
7927 return Legalized;
7928 }
7929 }
7930}
7931
7932// Check that (every element of) Reg is undef or not an exact multiple of BW.
7934 Register Reg, unsigned BW) {
7935 return matchUnaryPredicate(
7936 MRI, Reg,
7937 [=](const Constant *C) {
7938 // Null constant here means an undef.
7940 return !CI || CI->getValue().urem(BW) != 0;
7941 },
7942 /*AllowUndefs*/ true);
7943}
7944
7947 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7948 LLT Ty = MRI.getType(Dst);
7949 LLT ShTy = MRI.getType(Z);
7950
7951 unsigned BW = Ty.getScalarSizeInBits();
7952
7953 if (!isPowerOf2_32(BW))
7954 return UnableToLegalize;
7955
7956 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7957 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7958
7959 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7960 // fshl X, Y, Z -> fshr X, Y, -Z
7961 // fshr X, Y, Z -> fshl X, Y, -Z
7962 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7963 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7964 } else {
7965 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7966 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7967 auto One = MIRBuilder.buildConstant(ShTy, 1);
7968 if (IsFSHL) {
7969 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7970 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7971 } else {
7972 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7973 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7974 }
7975
7976 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7977 }
7978
7979 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7980 MI.eraseFromParent();
7981 return Legalized;
7982}
7983
7986 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7987 LLT Ty = MRI.getType(Dst);
7988 LLT ShTy = MRI.getType(Z);
7989
7990 const unsigned BW = Ty.getScalarSizeInBits();
7991 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7992
7993 Register ShX, ShY;
7994 Register ShAmt, InvShAmt;
7995
7996 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7997 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7998 // fshl: X << C | Y >> (BW - C)
7999 // fshr: X << (BW - C) | Y >> C
8000 // where C = Z % BW is not zero
8001 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
8002 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8003 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8004 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8005 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8006 } else {
8007 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8008 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8009 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
8010 if (isPowerOf2_32(BW)) {
8011 // Z % BW -> Z & (BW - 1)
8012 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8013 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8014 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
8015 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8016 } else {
8017 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
8018 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8019 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8020 }
8021
8022 auto One = MIRBuilder.buildConstant(ShTy, 1);
8023 if (IsFSHL) {
8024 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
8025 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
8026 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8027 } else {
8028 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
8029 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8030 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
8031 }
8032 }
8033
8034 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
8035 MI.eraseFromParent();
8036 return Legalized;
8037}
8038
8041 // These operations approximately do the following (while avoiding undefined
8042 // shifts by BW):
8043 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8044 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8045 Register Dst = MI.getOperand(0).getReg();
8046 LLT Ty = MRI.getType(Dst);
8047 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
8048
8049 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
8050 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8051
8052 // TODO: Use smarter heuristic that accounts for vector legalization.
8053 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
8054 return lowerFunnelShiftAsShifts(MI);
8055
8056 // This only works for powers of 2, fallback to shifts if it fails.
8057 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
8058 if (Result == UnableToLegalize)
8059 return lowerFunnelShiftAsShifts(MI);
8060 return Result;
8061}
8062
8064 auto [Dst, Src] = MI.getFirst2Regs();
8065 LLT DstTy = MRI.getType(Dst);
8066 LLT SrcTy = MRI.getType(Src);
8067
8068 uint32_t DstTySize = DstTy.getSizeInBits();
8069 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8070 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8071
8072 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
8073 !isPowerOf2_32(SrcTyScalarSize))
8074 return UnableToLegalize;
8075
8076 // The step between extend is too large, split it by creating an intermediate
8077 // extend instruction
8078 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8079 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
8080 // If the destination type is illegal, split it into multiple statements
8081 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8082 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8083 // Unmerge the vector
8084 LLT EltTy = MidTy.changeElementCount(
8086 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8087
8088 // ZExt the vectors
8089 LLT ZExtResTy = DstTy.changeElementCount(
8091 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8092 {UnmergeSrc.getReg(0)});
8093 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8094 {UnmergeSrc.getReg(1)});
8095
8096 // Merge the ending vectors
8097 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8098
8099 MI.eraseFromParent();
8100 return Legalized;
8101 }
8102 return UnableToLegalize;
8103}
8104
8106 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8107 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8108 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8109 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8110 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8111 // %lo16(<4 x s16>) = G_TRUNC %inlo
8112 // %hi16(<4 x s16>) = G_TRUNC %inhi
8113 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8114 // %res(<8 x s8>) = G_TRUNC %in16
8115
8116 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8117
8118 Register DstReg = MI.getOperand(0).getReg();
8119 Register SrcReg = MI.getOperand(1).getReg();
8120 LLT DstTy = MRI.getType(DstReg);
8121 LLT SrcTy = MRI.getType(SrcReg);
8122
8123 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8125 isPowerOf2_32(SrcTy.getNumElements()) &&
8126 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8127 // Split input type.
8128 LLT SplitSrcTy = SrcTy.changeElementCount(
8129 SrcTy.getElementCount().divideCoefficientBy(2));
8130
8131 // First, split the source into two smaller vectors.
8132 SmallVector<Register, 2> SplitSrcs;
8133 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8134
8135 // Truncate the splits into intermediate narrower elements.
8136 LLT InterTy;
8137 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8138 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8139 else
8140 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8141 for (Register &Src : SplitSrcs)
8142 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8143
8144 // Combine the new truncates into one vector
8145 auto Merge = MIRBuilder.buildMergeLikeInstr(
8146 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8147
8148 // Truncate the new vector to the final result type
8149 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8150 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8151 else
8152 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8153
8154 MI.eraseFromParent();
8155
8156 return Legalized;
8157 }
8158 return UnableToLegalize;
8159}
8160
8163 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8164 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8165 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8166 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8167 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8168 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8169 MI.eraseFromParent();
8170 return Legalized;
8171}
8172
8174 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8175
8176 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8177 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8178
8179 MIRBuilder.setInstrAndDebugLoc(MI);
8180
8181 // If a rotate in the other direction is supported, use it.
8182 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8183 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8184 isPowerOf2_32(EltSizeInBits))
8185 return lowerRotateWithReverseRotate(MI);
8186
8187 // If a funnel shift is supported, use it.
8188 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8189 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8190 bool IsFShLegal = false;
8191 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8192 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8193 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8194 Register R3) {
8195 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8196 MI.eraseFromParent();
8197 return Legalized;
8198 };
8199 // If a funnel shift in the other direction is supported, use it.
8200 if (IsFShLegal) {
8201 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8202 } else if (isPowerOf2_32(EltSizeInBits)) {
8203 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8204 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8205 }
8206 }
8207
8208 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8209 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8210 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8211 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8212 Register ShVal;
8213 Register RevShiftVal;
8214 if (isPowerOf2_32(EltSizeInBits)) {
8215 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8216 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8217 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8218 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8219 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8220 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8221 RevShiftVal =
8222 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8223 } else {
8224 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8225 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8226 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8227 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8228 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8229 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8230 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8231 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8232 RevShiftVal =
8233 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8234 }
8235 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8236 MI.eraseFromParent();
8237 return Legalized;
8238}
8239
8240// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8241// representation.
8244 auto [Dst, Src] = MI.getFirst2Regs();
8245 const LLT S64 = LLT::scalar(64);
8246 const LLT S32 = LLT::scalar(32);
8247 const LLT S1 = LLT::scalar(1);
8248
8249 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8250
8251 // unsigned cul2f(ulong u) {
8252 // uint lz = clz(u);
8253 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8254 // u = (u << lz) & 0x7fffffffffffffffUL;
8255 // ulong t = u & 0xffffffffffUL;
8256 // uint v = (e << 23) | (uint)(u >> 40);
8257 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8258 // return as_float(v + r);
8259 // }
8260
8261 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8262 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8263
8264 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8265
8266 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8267 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8268
8269 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8270 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8271
8272 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8273 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8274
8275 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8276
8277 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8278 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8279
8280 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8281 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8282 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8283
8284 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8285 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8286 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8287 auto One = MIRBuilder.buildConstant(S32, 1);
8288
8289 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8290 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8291 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8292 MIRBuilder.buildAdd(Dst, V, R);
8293
8294 MI.eraseFromParent();
8295 return Legalized;
8296}
8297
8298// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8299// operations and G_SITOFP
8302 auto [Dst, Src] = MI.getFirst2Regs();
8303 const LLT S64 = LLT::scalar(64);
8304 const LLT S32 = LLT::scalar(32);
8305 const LLT S1 = LLT::scalar(1);
8306
8307 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8308
8309 // For i64 < INT_MAX we simply reuse SITOFP.
8310 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8311 // saved before division, convert to float by SITOFP, multiply the result
8312 // by 2.
8313 auto One = MIRBuilder.buildConstant(S64, 1);
8314 auto Zero = MIRBuilder.buildConstant(S64, 0);
8315 // Result if Src < INT_MAX
8316 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8317 // Result if Src >= INT_MAX
8318 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8319 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8320 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8321 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8322 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8323 // Check if the original value is larger than INT_MAX by comparing with
8324 // zero to pick one of the two conversions.
8325 auto IsLarge =
8326 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8327 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8328
8329 MI.eraseFromParent();
8330 return Legalized;
8331}
8332
8333// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8334// IEEE double representation.
8337 auto [Dst, Src] = MI.getFirst2Regs();
8338 const LLT S64 = LLT::scalar(64);
8339 const LLT S32 = LLT::scalar(32);
8340
8341 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8342
8343 // We create double value from 32 bit parts with 32 exponent difference.
8344 // Note that + and - are float operations that adjust the implicit leading
8345 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8346 //
8347 // X = 2^52 * 1.0...LowBits
8348 // Y = 2^84 * 1.0...HighBits
8349 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8350 // = - 2^52 * 1.0...HighBits
8351 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8352 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8353 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8354 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8355 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8356 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8357
8358 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8359 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8360 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8361 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8362 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8363 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8364 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8365
8366 MI.eraseFromParent();
8367 return Legalized;
8368}
8369
8370/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8371/// convert fpround f64->f16 without double-rounding, so we manually perform the
8372/// lowering here where we know it is valid.
8375 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8376 auto DstFpTy =
8377 SrcTy.changeElementType(LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8378 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8379 ? MIRBuilder.buildUITOFP(DstFpTy, Src)
8380 : MIRBuilder.buildSITOFP(DstFpTy, Src);
8381 LLT F32Ty = DstFpTy.changeElementSize(32);
8382 auto M2 = MIRBuilder.buildFPTrunc(F32Ty, M1);
8383 MIRBuilder.buildFPTrunc(Dst, M2);
8384 MI.eraseFromParent();
8386}
8387
8389 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8390
8391 if (SrcTy == LLT::scalar(1)) {
8392 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8393 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8394 MIRBuilder.buildSelect(Dst, Src, True, False);
8395 MI.eraseFromParent();
8396 return Legalized;
8397 }
8398
8399 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8400 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8401
8402 if (SrcTy != LLT::scalar(64))
8403 return UnableToLegalize;
8404
8405 if (DstTy == LLT::scalar(32))
8406 // TODO: SelectionDAG has several alternative expansions to port which may
8407 // be more reasonable depending on the available instructions. We also need
8408 // a more advanced mechanism to choose an optimal version depending on
8409 // target features such as sitofp or CTLZ availability.
8411
8412 if (DstTy == LLT::scalar(64))
8414
8415 return UnableToLegalize;
8416}
8417
8419 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8420
8421 const LLT I64 = LLT::integer(64);
8422 const LLT I32 = LLT::integer(32);
8423 const LLT I1 = LLT::integer(1);
8424
8425 if (SrcTy == I1) {
8426 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8427 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8428 MIRBuilder.buildSelect(Dst, Src, True, False);
8429 MI.eraseFromParent();
8430 return Legalized;
8431 }
8432
8433 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8434 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8435
8436 if (SrcTy != I64)
8437 return UnableToLegalize;
8438
8439 if (DstTy.getScalarSizeInBits() == 32) {
8440 // signed cl2f(long l) {
8441 // long s = l >> 63;
8442 // float r = cul2f((l + s) ^ s);
8443 // return s ? -r : r;
8444 // }
8445 Register L = Src;
8446 auto SignBit = MIRBuilder.buildConstant(I64, 63);
8447 auto S = MIRBuilder.buildAShr(I64, L, SignBit);
8448
8449 auto LPlusS = MIRBuilder.buildAdd(I64, L, S);
8450 auto Xor = MIRBuilder.buildXor(I64, LPlusS, S);
8451 auto R = MIRBuilder.buildUITOFP(I32, Xor);
8452
8453 auto RNeg = MIRBuilder.buildFNeg(I32, R);
8454 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, S,
8455 MIRBuilder.buildConstant(I64, 0));
8456 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8457 MI.eraseFromParent();
8458 return Legalized;
8459 }
8460
8461 return UnableToLegalize;
8462}
8463
8465 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8466 const LLT S64 = LLT::scalar(64);
8467 const LLT S32 = LLT::scalar(32);
8468
8469 if (SrcTy != S64 && SrcTy != S32)
8470 return UnableToLegalize;
8471 if (DstTy != S32 && DstTy != S64)
8472 return UnableToLegalize;
8473
8474 // FPTOSI gives same result as FPTOUI for positive signed integers.
8475 // FPTOUI needs to deal with fp values that convert to unsigned integers
8476 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8477
8478 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8479 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8481 APInt::getZero(SrcTy.getSizeInBits()));
8482 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8483
8484 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8485
8486 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8487 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8488 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8489 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8490 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8491 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8492 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8493
8494 const LLT S1 = LLT::scalar(1);
8495
8496 MachineInstrBuilder FCMP =
8497 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8498 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8499
8500 MI.eraseFromParent();
8501 return Legalized;
8502}
8503
8505 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8506 const LLT S64 = LLT::scalar(64);
8507 const LLT S32 = LLT::scalar(32);
8508
8509 // FIXME: Only f32 to i64 conversions are supported.
8510 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8511 return UnableToLegalize;
8512
8513 // Expand f32 -> i64 conversion
8514 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8515 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8516
8517 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8518
8519 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8520 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8521
8522 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8523 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8524
8525 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8526 APInt::getSignMask(SrcEltBits));
8527 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8528 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8529 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8530 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8531
8532 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8533 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8534 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8535
8536 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8537 R = MIRBuilder.buildZExt(DstTy, R);
8538
8539 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8540 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8541 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8542 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8543
8544 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8545 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8546
8547 const LLT S1 = LLT::scalar(1);
8548 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8549 S1, Exponent, ExponentLoBit);
8550
8551 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8552
8553 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8554 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8555
8556 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8557
8558 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8559 S1, Exponent, ZeroSrcTy);
8560
8561 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8562 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8563
8564 MI.eraseFromParent();
8565 return Legalized;
8566}
8567
8570 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8571
8572 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8573 unsigned SatWidth = DstTy.getScalarSizeInBits();
8574
8575 // Determine minimum and maximum integer values and their corresponding
8576 // floating-point values.
8577 APInt MinInt, MaxInt;
8578 if (IsSigned) {
8579 MinInt = APInt::getSignedMinValue(SatWidth);
8580 MaxInt = APInt::getSignedMaxValue(SatWidth);
8581 } else {
8582 MinInt = APInt::getMinValue(SatWidth);
8583 MaxInt = APInt::getMaxValue(SatWidth);
8584 }
8585
8586 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8587 APFloat MinFloat(Semantics);
8588 APFloat MaxFloat(Semantics);
8589
8590 APFloat::opStatus MinStatus =
8591 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8592 APFloat::opStatus MaxStatus =
8593 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8594 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8595 !(MaxStatus & APFloat::opStatus::opInexact);
8596
8597 // If the integer bounds are exactly representable as floats, emit a
8598 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8599 // and selects.
8600 if (AreExactFloatBounds) {
8601 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8602 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8603 auto MaxP =
8604 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src, MaxC);
8605 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8606 // Clamp by MaxFloat from above. NaN cannot occur.
8607 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8608 auto MinP = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, LLT::integer(1), Max,
8610 auto Min =
8611 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8612 // Convert clamped value to integer. In the unsigned case we're done,
8613 // because we mapped NaN to MinFloat, which will cast to zero.
8614 if (!IsSigned) {
8615 MIRBuilder.buildFPTOUI(Dst, Min);
8616 MI.eraseFromParent();
8617 return Legalized;
8618 }
8619
8620 // Otherwise, select 0 if Src is NaN.
8621 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8622 auto IsZero =
8623 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8624 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8625 FpToInt);
8626 MI.eraseFromParent();
8627 return Legalized;
8628 }
8629
8630 // Result of direct conversion. The assumption here is that the operation is
8631 // non-trapping and it's fine to apply it to an out-of-range value if we
8632 // select it away later.
8633 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8634 : MIRBuilder.buildFPTOUI(DstTy, Src);
8635
8636 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8637 // MinInt if Src is NaN.
8638 auto ULT = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, LLT::integer(1), Src,
8639 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8640 auto Max = MIRBuilder.buildSelect(
8641 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8642 // If Src OGT MaxFloat, select MaxInt.
8643 auto OGT = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src,
8644 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8645
8646 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8647 // is already zero.
8648 if (!IsSigned) {
8649 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8650 Max);
8651 MI.eraseFromParent();
8652 return Legalized;
8653 }
8654
8655 // Otherwise, select 0 if Src is NaN.
8656 auto Min = MIRBuilder.buildSelect(
8657 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8658 auto IsZero =
8659 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8660 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8661 MI.eraseFromParent();
8662 return Legalized;
8663}
8664
8665// Floating-point conversions using truncating and extending loads and stores.
8668 assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
8669 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8670 "Only G_FPEXT and G_FPTRUNC are expected");
8671
8672 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8673 MachinePointerInfo PtrInfo;
8674 unsigned StoreOpc;
8675 unsigned LoadOpc;
8676 LLT StackTy;
8677 if (MI.getOpcode() == TargetOpcode::G_FPEXT) {
8678 StackTy = SrcTy;
8679 StoreOpc = TargetOpcode::G_STORE;
8680 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8681 } else {
8682 StackTy = DstTy;
8683 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8684 LoadOpc = TargetOpcode::G_LOAD;
8685 }
8686
8687 Align StackTyAlign = getStackTemporaryAlignment(StackTy);
8688 auto StackTemp =
8689 createStackTemporary(StackTy.getSizeInBytes(), StackTyAlign, PtrInfo);
8690
8691 MachineFunction &MF = MIRBuilder.getMF();
8692 auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
8693 StackTy, StackTyAlign);
8694 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8695
8696 auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
8697 StackTy, StackTyAlign);
8698 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8699
8700 MI.eraseFromParent();
8701 return Legalized;
8702}
8703
8704// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8707 const LLT S1 = LLT::scalar(1);
8708 const LLT S32 = LLT::scalar(32);
8709
8710 auto [Dst, Src] = MI.getFirst2Regs();
8711 assert(MRI.getType(Dst).getScalarType() == LLT::float16() &&
8712 MRI.getType(Src).getScalarType() == LLT::float64());
8713
8714 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8715 return UnableToLegalize;
8716
8717 if (MI.getFlag(MachineInstr::FmAfn)) {
8718 unsigned Flags = MI.getFlags();
8719 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8720 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8721 MI.eraseFromParent();
8722 return Legalized;
8723 }
8724
8725 const unsigned ExpMask = 0x7ff;
8726 const unsigned ExpBiasf64 = 1023;
8727 const unsigned ExpBiasf16 = 15;
8728
8729 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8730 Register U = Unmerge.getReg(0);
8731 Register UH = Unmerge.getReg(1);
8732
8733 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8734 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8735
8736 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8737 // add the f16 bias (15) to get the biased exponent for the f16 format.
8738 E = MIRBuilder.buildAdd(
8739 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8740
8741 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8742 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8743
8744 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8745 MIRBuilder.buildConstant(S32, 0x1ff));
8746 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8747
8748 auto Zero = MIRBuilder.buildConstant(S32, 0);
8749 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8750 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8751 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8752
8753 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8754 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8755 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8756 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8757
8758 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8759 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8760
8761 // N = M | (E << 12);
8762 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8763 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8764
8765 // B = clamp(1-E, 0, 13);
8766 auto One = MIRBuilder.buildConstant(S32, 1);
8767 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8768 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8769 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8770
8771 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8772 MIRBuilder.buildConstant(S32, 0x1000));
8773
8774 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8775 auto D0 = MIRBuilder.buildShl(S32, D, B);
8776
8777 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8778 D0, SigSetHigh);
8779 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8780 D = MIRBuilder.buildOr(S32, D, D1);
8781
8782 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8783 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8784
8785 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8786 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8787
8788 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8789 MIRBuilder.buildConstant(S32, 3));
8790 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8791
8792 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8793 MIRBuilder.buildConstant(S32, 5));
8794 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8795
8796 V1 = MIRBuilder.buildOr(S32, V0, V1);
8797 V = MIRBuilder.buildAdd(S32, V, V1);
8798
8799 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8800 E, MIRBuilder.buildConstant(S32, 30));
8801 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8802 MIRBuilder.buildConstant(S32, 0x7c00), V);
8803
8804 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8805 E, MIRBuilder.buildConstant(S32, 1039));
8806 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8807
8808 // Extract the sign bit.
8809 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8810 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8811
8812 // Insert the sign bit
8813 V = MIRBuilder.buildOr(S32, Sign, V);
8814
8815 MIRBuilder.buildTrunc(Dst, V);
8816 MI.eraseFromParent();
8817 return Legalized;
8818}
8819
8820// f32 -> bf16 conversion using round-to-nearest-even rounding mode.
8823 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8824 assert(DstTy.getScalarType() == LLT::bfloat16() &&
8825 SrcTy.getScalarType() == LLT::float32());
8826
8827 LLT I1Ty = SrcTy.changeElementType(LLT::integer(1));
8828 LLT I16Ty = SrcTy.changeElementType(LLT::integer(16));
8829 LLT I32Ty = SrcTy.changeElementType(LLT::integer(32));
8830
8831 auto IsNaN = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, I1Ty, SrcReg,
8832 MIRBuilder.buildFConstant(SrcTy, 0));
8833 auto SrcI = MIRBuilder.buildBitcast(I32Ty, SrcReg);
8834
8835 // Conversions should set NaN's quiet bit. This also prevents NaNs from
8836 // turning into infinities.
8837 auto NaN = MIRBuilder.buildOr(I32Ty, SrcI,
8838 MIRBuilder.buildConstant(I32Ty, 0x400000));
8839
8840 // Factor in the contribution of the low 16 bits.
8841 auto Lsb =
8842 MIRBuilder.buildLShr(I32Ty, SrcI, MIRBuilder.buildConstant(I32Ty, 16));
8843 Lsb = MIRBuilder.buildAnd(I32Ty, Lsb, MIRBuilder.buildConstant(I32Ty, 1));
8844 auto RoundingBias =
8845 MIRBuilder.buildAdd(I32Ty, Lsb, MIRBuilder.buildConstant(I32Ty, 0x7fff));
8846 auto Add = MIRBuilder.buildAdd(I32Ty, SrcI, RoundingBias);
8847
8848 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
8849 // 0x80000000.
8850 auto Sel = MIRBuilder.buildSelect(I32Ty, IsNaN, NaN, Add);
8851
8852 // Now that we have rounded, shift the bits into position.
8853 auto Srl =
8854 MIRBuilder.buildLShr(I32Ty, Sel, MIRBuilder.buildConstant(I32Ty, 16));
8855 auto Trunc = MIRBuilder.buildTrunc(I16Ty, Srl);
8856 MIRBuilder.buildBitcast(DstReg, Trunc);
8857 MI.eraseFromParent();
8858 return Legalized;
8859}
8860
8863 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8864 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8866
8867 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8869
8870 return lowerFPExtAndTruncMem(MI);
8871}
8872
8874 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8875 LLT Ty = MRI.getType(Dst);
8876
8877 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8878 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8879 MI.eraseFromParent();
8880 return Legalized;
8881}
8882
8884 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8885 LLT Ty = MRI.getType(Src);
8886 auto Flags = MI.getFlags();
8887 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
8888
8889 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Ty, Src, Flags);
8890 auto FracPart = MIRBuilder.buildFSub(Ty, Src, IntPart, Flags);
8891
8892 Register FracToUse;
8893 if (MI.getFlag(MachineInstr::FmNoInfs)) {
8894 FracToUse = FracPart.getReg(0);
8895 } else {
8896 auto Abs = MIRBuilder.buildFAbs(Ty, Src, Flags);
8897 const fltSemantics &Semantics = getFltSemanticForLLT(Ty.getScalarType());
8898 auto Inf = MIRBuilder.buildFConstant(Ty, APFloat::getInf(Semantics));
8899 auto IsInf = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CondTy, Abs, Inf);
8900 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8901 auto Select = MIRBuilder.buildSelect(Ty, IsInf, Zero, FracPart);
8902 FracToUse = Select.getReg(0);
8903 }
8904
8905 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8906 MIRBuilder.buildCopy(DstInt, IntPart.getReg(0));
8907
8908 MI.eraseFromParent();
8909 return Legalized;
8910}
8911
8913 switch (Opc) {
8914 case TargetOpcode::G_SMIN:
8915 return CmpInst::ICMP_SLT;
8916 case TargetOpcode::G_SMAX:
8917 return CmpInst::ICMP_SGT;
8918 case TargetOpcode::G_UMIN:
8919 return CmpInst::ICMP_ULT;
8920 case TargetOpcode::G_UMAX:
8921 return CmpInst::ICMP_UGT;
8922 default:
8923 llvm_unreachable("not in integer min/max");
8924 }
8925}
8926
8928 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8929
8930 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8931 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::integer(1));
8932
8933 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8934 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8935
8936 MI.eraseFromParent();
8937 return Legalized;
8938}
8939
8942 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8943
8944 Register Dst = Cmp->getReg(0);
8945 LLT DstTy = MRI.getType(Dst);
8946 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8947 LLT CmpTy = DstTy.changeElementSize(1);
8948
8949 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8952 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8955
8956 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8957 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8958 Cmp->getRHSReg());
8959 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8960 Cmp->getRHSReg());
8961
8962 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8963 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8964 if (TLI.preferSelectsOverBooleanArithmetic(
8965 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8967 auto One = MIRBuilder.buildConstant(DstTy, 1);
8968 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8969
8970 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8971 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8972 } else {
8974 std::swap(IsGT, IsLT);
8975 // Extend boolean results to DstTy, which is at least i2, before subtracting
8976 // them.
8977 unsigned BoolExtOp =
8978 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8979 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8980 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8981 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8982 }
8983
8984 MI.eraseFromParent();
8985 return Legalized;
8986}
8987
8990 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8991 const int Src0Size = Src0Ty.getScalarSizeInBits();
8992 const int Src1Size = Src1Ty.getScalarSizeInBits();
8993
8994 LLT DstIntTy =
8995 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
8996 LLT Src0IntTy = Src0Ty.changeElementType(LLT::integer(Src0Size));
8997 LLT Src1IntTy = Src1Ty.changeElementType(LLT::integer(Src1Size));
8998
8999 Register Src0Int = Src0;
9000 Register Src1Int = Src1;
9001
9002 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9003 Src0Ty.getScalarType().isInteger()))
9004 Src0Int = MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9005
9006 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9007 Src1Ty.getScalarType().isInteger()))
9008 Src1Int = MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9009
9010 auto SignBitMask =
9011 MIRBuilder.buildConstant(Src0IntTy, APInt::getSignMask(Src0Size));
9012
9013 auto NotSignBitMask = MIRBuilder.buildConstant(
9014 Src0IntTy, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
9015
9016 Register And0 =
9017 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9018 Register And1;
9019 if (Src0Ty == Src1Ty) {
9020 And1 = MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9021 } else if (Src0Size > Src1Size) {
9022 auto ShiftAmt = MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9023 auto Zext = MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9024 auto Shift = MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9025 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9026 } else {
9027 auto ShiftAmt = MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9028 auto Shift = MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9029 auto Trunc = MIRBuilder.buildTrunc(Src0IntTy, Shift);
9030 And1 = MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9031 }
9032
9033 // Be careful about setting nsz/nnan/ninf on every instruction, since the
9034 // constants are a nan and -0.0, but the final result should preserve
9035 // everything.
9036 unsigned Flags = MI.getFlags();
9037
9038 // We masked the sign bit and the not-sign bit, so these are disjoint.
9039 Flags |= MachineInstr::Disjoint;
9040
9041 if (DstTy == DstIntTy)
9042 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9043 else {
9044 Register NewDst = MIRBuilder.buildOr(DstIntTy, And0, And1, Flags).getReg(0);
9045 MIRBuilder.buildBitcast(Dst, NewDst);
9046 }
9047
9048 MI.eraseFromParent();
9049 return Legalized;
9050}
9051
9054 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
9055 // identical handling. fminimumnum/fmaximumnum also need a path that do not
9056 // depend on fminnum/fmaxnum.
9057
9058 unsigned NewOp;
9059 switch (MI.getOpcode()) {
9060 case TargetOpcode::G_FMINNUM:
9061 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9062 break;
9063 case TargetOpcode::G_FMINIMUMNUM:
9064 NewOp = TargetOpcode::G_FMINNUM;
9065 break;
9066 case TargetOpcode::G_FMAXNUM:
9067 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9068 break;
9069 case TargetOpcode::G_FMAXIMUMNUM:
9070 NewOp = TargetOpcode::G_FMAXNUM;
9071 break;
9072 default:
9073 llvm_unreachable("unexpected min/max opcode");
9074 }
9075
9076 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9077 LLT Ty = MRI.getType(Dst);
9078
9079 if (!MI.getFlag(MachineInstr::FmNoNans)) {
9080 // Insert canonicalizes if it's possible we need to quiet to get correct
9081 // sNaN behavior.
9082
9083 // Note this must be done here, and not as an optimization combine in the
9084 // absence of a dedicate quiet-snan instruction as we're using an
9085 // omni-purpose G_FCANONICALIZE.
9086 if (!VT->isKnownNeverSNaN(Src0))
9087 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
9088
9089 if (!VT->isKnownNeverSNaN(Src1))
9090 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
9091 }
9092
9093 // If there are no nans, it's safe to simply replace this with the non-IEEE
9094 // version.
9095 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
9096 MI.eraseFromParent();
9097 return Legalized;
9098}
9099
9102 unsigned Opc = MI.getOpcode();
9103 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9104 LLT Ty = MRI.getType(Dst);
9105 const LLT CmpTy = Ty.changeElementType(LLT::integer(1));
9106
9107 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
9108 unsigned OpcIeee =
9109 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9110 unsigned OpcNonIeee =
9111 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9112 bool MinMaxMustRespectOrderedZero = false;
9113 Register Res;
9114
9115 // IEEE variants don't need canonicalization
9116 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9117 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
9118 MinMaxMustRespectOrderedZero = true;
9119 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9120 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
9121 } else {
9122 auto Compare = MIRBuilder.buildFCmp(
9123 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
9124 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9125 }
9126
9127 // Propagate any NaN of both operands
9128 if (!MI.getFlag(MachineInstr::FmNoNans) &&
9129 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9130 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
9131
9132 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
9133 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
9134 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
9135 if (Ty.isVector())
9136 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9137
9138 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9139 }
9140
9141 // fminimum/fmaximum requires -0.0 less than +0.0
9142 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
9143 GISelValueTracking VT(MIRBuilder.getMF());
9144 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
9145 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
9146
9147 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
9148 const unsigned Flags = MI.getFlags();
9149 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
9150 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
9151
9152 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
9153
9154 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9155 auto LHSSelect =
9156 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9157
9158 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9159 auto RHSSelect =
9160 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9161
9162 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9163 }
9164 }
9165
9166 MIRBuilder.buildCopy(Dst, Res);
9167 MI.eraseFromParent();
9168 return Legalized;
9169}
9170
9172 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9173 Register DstReg = MI.getOperand(0).getReg();
9174 LLT Ty = MRI.getType(DstReg);
9175 unsigned Flags = MI.getFlags();
9176
9177 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
9178 Flags);
9179 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
9180 MI.eraseFromParent();
9181 return Legalized;
9182}
9183
9186 auto [DstReg, X] = MI.getFirst2Regs();
9187 const unsigned Flags = MI.getFlags();
9188 const LLT Ty = MRI.getType(DstReg);
9189 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
9190
9191 // round(x) =>
9192 // t = trunc(x);
9193 // d = fabs(x - t);
9194 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9195 // return t + o;
9196
9197 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
9198
9199 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
9200 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
9201
9202 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
9203 auto Cmp =
9204 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
9205
9206 // Could emit G_UITOFP instead
9207 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
9208 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9209 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9210 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
9211
9212 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9213
9214 MI.eraseFromParent();
9215 return Legalized;
9216}
9217
9219 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9220 unsigned Flags = MI.getFlags();
9221 LLT Ty = MRI.getType(DstReg);
9222 const LLT CondTy = Ty.changeElementType(LLT::integer(1));
9223
9224 // result = trunc(src);
9225 // if (src < 0.0 && src != result)
9226 // result += -1.0.
9227
9228 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9229 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9230
9231 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9232 SrcReg, Zero, Flags);
9233 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9234 SrcReg, Trunc, Flags);
9235 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9236 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9237
9238 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9239 MI.eraseFromParent();
9240 return Legalized;
9241}
9242
9245 const unsigned NumOps = MI.getNumOperands();
9246 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9247 unsigned PartSize = Src0Ty.getSizeInBits();
9248
9249 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9250 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9251
9252 for (unsigned I = 2; I != NumOps; ++I) {
9253 const unsigned Offset = (I - 1) * PartSize;
9254
9255 Register SrcReg = MI.getOperand(I).getReg();
9256 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9257
9258 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9259 MRI.createGenericVirtualRegister(WideTy);
9260
9261 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9262 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9263 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9264 ResultReg = NextResult;
9265 }
9266
9267 if (DstTy.isPointer()) {
9268 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9269 DstTy.getAddressSpace())) {
9270 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9271 return UnableToLegalize;
9272 }
9273
9274 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9275 }
9276
9277 MI.eraseFromParent();
9278 return Legalized;
9279}
9280
9283 const unsigned NumDst = MI.getNumOperands() - 1;
9284 Register SrcReg = MI.getOperand(NumDst).getReg();
9285 Register Dst0Reg = MI.getOperand(0).getReg();
9286 LLT DstTy = MRI.getType(Dst0Reg);
9287 if (DstTy.isPointer())
9288 return UnableToLegalize; // TODO
9289
9290 SrcReg = coerceToScalar(SrcReg);
9291 if (!SrcReg)
9292 return UnableToLegalize;
9293
9294 // Expand scalarizing unmerge as bitcast to integer and shift.
9295 LLT IntTy = MRI.getType(SrcReg);
9296
9297 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9298
9299 const unsigned DstSize = DstTy.getSizeInBits();
9300 unsigned Offset = DstSize;
9301 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9302 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9303 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9304 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9305 }
9306
9307 MI.eraseFromParent();
9308 return Legalized;
9309}
9310
9311/// Lower a vector extract or insert by writing the vector to a stack temporary
9312/// and reloading the element or vector.
9313///
9314/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9315/// =>
9316/// %stack_temp = G_FRAME_INDEX
9317/// G_STORE %vec, %stack_temp
9318/// %idx = clamp(%idx, %vec.getNumElements())
9319/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9320/// %dst = G_LOAD %element_ptr
9323 Register DstReg = MI.getOperand(0).getReg();
9324 Register SrcVec = MI.getOperand(1).getReg();
9325 Register InsertVal;
9326 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9327 InsertVal = MI.getOperand(2).getReg();
9328
9329 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9330
9331 LLT VecTy = MRI.getType(SrcVec);
9332 LLT EltTy = VecTy.getElementType();
9333 unsigned NumElts = VecTy.getNumElements();
9334
9335 int64_t IdxVal;
9336 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9338 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9339
9340 if (InsertVal) {
9341 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9342 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9343 } else {
9344 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9345 }
9346
9347 MI.eraseFromParent();
9348 return Legalized;
9349 }
9350
9351 if (!EltTy.isByteSized()) { // Not implemented.
9352 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9353 return UnableToLegalize;
9354 }
9355
9356 unsigned EltBytes = EltTy.getSizeInBytes();
9357 Align VecAlign = getStackTemporaryAlignment(VecTy);
9358 Align EltAlign;
9359
9360 MachinePointerInfo PtrInfo;
9361 auto StackTemp = createStackTemporary(
9362 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9363 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9364
9365 // Get the pointer to the element, and be sure not to hit undefined behavior
9366 // if the index is out of bounds.
9367 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9368
9369 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9370 int64_t Offset = IdxVal * EltBytes;
9371 PtrInfo = PtrInfo.getWithOffset(Offset);
9372 EltAlign = commonAlignment(VecAlign, Offset);
9373 } else {
9374 // We lose information with a variable offset.
9375 EltAlign = getStackTemporaryAlignment(EltTy);
9376 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9377 }
9378
9379 if (InsertVal) {
9380 // Write the inserted element
9381 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9382
9383 // Reload the whole vector.
9384 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9385 } else {
9386 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9387 }
9388
9389 MI.eraseFromParent();
9390 return Legalized;
9391}
9392
9395 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9396 MI.getFirst3RegLLTs();
9397 LLT IdxTy = LLT::scalar(32);
9398
9399 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9402 LLT EltTy = DstTy.getScalarType();
9403
9404 DenseMap<unsigned, Register> CachedExtract;
9405
9406 for (int Idx : Mask) {
9407 if (Idx < 0) {
9408 if (!Undef.isValid())
9409 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9410 BuildVec.push_back(Undef);
9411 continue;
9412 }
9413
9414 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9415
9416 int NumElts = Src0Ty.getNumElements();
9417 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9418 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9419 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9420 if (Inserted) {
9421 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9422 It->second =
9423 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9424 }
9425 BuildVec.push_back(It->second);
9426 }
9427
9428 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9429 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9430 MI.eraseFromParent();
9431 return Legalized;
9432}
9433
9436 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9437 MI.getFirst4RegLLTs();
9438
9439 if (VecTy.isScalableVector())
9440 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9441
9442 Align VecAlign = getStackTemporaryAlignment(VecTy);
9443 MachinePointerInfo PtrInfo;
9444 Register StackPtr =
9445 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9446 PtrInfo)
9447 .getReg(0);
9448 MachinePointerInfo ValPtrInfo =
9450
9451 LLT IdxTy = LLT::scalar(32);
9452 LLT ValTy = VecTy.getElementType();
9453 Align ValAlign = getStackTemporaryAlignment(ValTy);
9454
9455 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9456
9457 bool HasPassthru =
9458 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9459
9460 if (HasPassthru)
9461 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9462
9463 Register LastWriteVal;
9464 std::optional<APInt> PassthruSplatVal =
9465 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9466
9467 if (PassthruSplatVal.has_value()) {
9468 LastWriteVal =
9469 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9470 } else if (HasPassthru) {
9471 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9472 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9473 {LLT::scalar(32)}, {Popcount});
9474
9475 Register LastElmtPtr =
9476 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9477 LastWriteVal =
9478 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9479 .getReg(0);
9480 }
9481
9482 unsigned NumElmts = VecTy.getNumElements();
9483 for (unsigned I = 0; I < NumElmts; ++I) {
9484 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9485 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9486 Register ElmtPtr =
9487 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9488 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9489
9490 LLT MaskITy = MaskTy.getElementType();
9491 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9492 if (MaskITy.getSizeInBits() > 1)
9493 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9494
9495 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9496 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9497
9498 if (HasPassthru && I == NumElmts - 1) {
9499 auto EndOfVector =
9500 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9501 auto AllLanesSelected = MIRBuilder.buildICmp(
9502 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9503 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9504 {OutPos, EndOfVector});
9505 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9506
9507 LastWriteVal =
9508 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9509 .getReg(0);
9510 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9511 }
9512 }
9513
9514 // TODO: Use StackPtr's FrameIndex alignment.
9515 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9516
9517 MI.eraseFromParent();
9518 return Legalized;
9519}
9520
9522 Register AllocSize,
9523 Align Alignment,
9524 LLT PtrTy) {
9525 LLT IntPtrTy = LLT::integer(PtrTy.getSizeInBits());
9526
9527 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9528 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9529
9530 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9531 // have to generate an extra instruction to negate the alloc and then use
9532 // G_PTR_ADD to add the negative offset.
9533 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9534 if (Alignment > Align(1)) {
9535 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9536 AlignMask.negate();
9537 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9538 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9539 }
9540
9541 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9542}
9543
9546 const auto &MF = *MI.getMF();
9547 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9548 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9549 return UnableToLegalize;
9550
9551 Register Dst = MI.getOperand(0).getReg();
9552 Register AllocSize = MI.getOperand(1).getReg();
9553 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9554
9555 LLT PtrTy = MRI.getType(Dst);
9556 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9557 Register SPTmp =
9558 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9559
9560 MIRBuilder.buildCopy(SPReg, SPTmp);
9561 MIRBuilder.buildCopy(Dst, SPTmp);
9562
9563 MI.eraseFromParent();
9564 return Legalized;
9565}
9566
9569 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9570 if (!StackPtr)
9571 return UnableToLegalize;
9572
9573 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9574 MI.eraseFromParent();
9575 return Legalized;
9576}
9577
9580 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9581 if (!StackPtr)
9582 return UnableToLegalize;
9583
9584 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9585 MI.eraseFromParent();
9586 return Legalized;
9587}
9588
9591 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9592 unsigned Offset = MI.getOperand(2).getImm();
9593
9594 // Extract sub-vector or one element
9595 if (SrcTy.isVector()) {
9596 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9597 unsigned DstSize = DstTy.getSizeInBits();
9598
9599 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9600 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9601 // Unmerge and allow access to each Src element for the artifact combiner.
9602 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9603
9604 // Take element(s) we need to extract and copy it (merge them).
9605 SmallVector<Register, 8> SubVectorElts;
9606 for (unsigned Idx = Offset / SrcEltSize;
9607 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9608 SubVectorElts.push_back(Unmerge.getReg(Idx));
9609 }
9610 if (SubVectorElts.size() == 1)
9611 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9612 else
9613 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9614
9615 MI.eraseFromParent();
9616 return Legalized;
9617 }
9618 }
9619
9620 const DataLayout &DL = MIRBuilder.getDataLayout();
9621 if ((SrcTy.isPointer() &&
9622 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9623 (DstTy.isPointer() &&
9624 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9625 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9626 return UnableToLegalize;
9627 }
9628
9629 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9630 (SrcTy.isScalar() || SrcTy.isPointer() ||
9631 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9632 LLT SrcIntTy = SrcTy;
9633 if (!SrcTy.isScalar()) {
9634 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9635 SrcReg = MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9636 }
9637
9638 Register ResultReg = DstReg;
9639 if (DstTy.isPointer())
9640 ResultReg =
9641 MRI.createGenericVirtualRegister(LLT::scalar(DstTy.getSizeInBits()));
9642
9643 if (Offset == 0)
9644 MIRBuilder.buildTrunc(ResultReg, SrcReg);
9645 else {
9646 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9647 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9648 MIRBuilder.buildTrunc(ResultReg, Shr);
9649 }
9650
9651 if (DstTy.isPointer())
9652 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9653
9654 MI.eraseFromParent();
9655 return Legalized;
9656 }
9657
9658 return UnableToLegalize;
9659}
9660
9662 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9663 uint64_t Offset = MI.getOperand(3).getImm();
9664
9665 LLT DstTy = MRI.getType(Src);
9666 LLT InsertTy = MRI.getType(InsertSrc);
9667
9668 const DataLayout &DL = MIRBuilder.getDataLayout();
9669 bool IsNonIntegralInsert =
9670 InsertTy.isPointerOrPointerVector() &&
9671 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace());
9672 bool IsNonIntegralDst = DstTy.isPointerOrPointerVector() &&
9673 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace());
9674
9675 // Insert sub-vector or one element
9676 if (DstTy.isVector()) {
9677 LLT EltTy = DstTy.getElementType();
9678
9679 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9680 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9681 return UnableToLegalize;
9682 }
9683
9684 unsigned EltSize = EltTy.getSizeInBits();
9685 unsigned InsertSize = InsertTy.getSizeInBits();
9686
9687 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9688 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9689 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9691 unsigned Idx = 0;
9692 // Elements from Src before insert start Offset
9693 for (; Idx < Offset / EltSize; ++Idx) {
9694 DstElts.push_back(UnmergeSrc.getReg(Idx));
9695 }
9696
9697 // Replace elements in Src with elements from InsertSrc
9698 if (InsertTy.getSizeInBits() > EltSize) {
9699 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9700 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9701 ++Idx, ++i) {
9702 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9703 }
9704 } else {
9705 if (InsertTy.isPointer() && !EltTy.isPointer())
9706 InsertSrc = MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9707 else if (!InsertTy.isPointer() && EltTy.isPointer())
9708 InsertSrc = MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9709 DstElts.push_back(InsertSrc);
9710 ++Idx;
9711 }
9712
9713 // Remaining elements from Src after insert
9714 for (; Idx < DstTy.getNumElements(); ++Idx) {
9715 DstElts.push_back(UnmergeSrc.getReg(Idx));
9716 }
9717
9718 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9719 MI.eraseFromParent();
9720 return Legalized;
9721 }
9722 }
9723
9724 if (InsertTy.isVector() ||
9725 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9726 return UnableToLegalize;
9727
9728 if (IsNonIntegralDst || IsNonIntegralInsert) {
9729 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9730 return UnableToLegalize;
9731 }
9732
9733 LLT IntDstTy = DstTy;
9734
9735 if (!DstTy.isScalar()) {
9736 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9737 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9738 }
9739
9740 if (!InsertTy.isScalar()) {
9741 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9742 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9743 }
9744
9745 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9746 if (Offset != 0) {
9747 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9748 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9749 }
9750
9752 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9753
9754 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9755 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9756 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9757
9758 MIRBuilder.buildCast(Dst, Or);
9759 MI.eraseFromParent();
9760 return Legalized;
9761}
9762
9765 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9766 MI.getFirst4RegLLTs();
9767 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9768
9769 LLT Ty = Dst0Ty;
9770 LLT BoolTy = Dst1Ty;
9771
9772 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9773
9774 if (IsAdd)
9775 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9776 else
9777 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9778
9779 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9780
9781 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9782
9783 if (IsAdd) {
9784 // For an addition, the result should be less than one of the operands (LHS)
9785 // if and only if the other operand (RHS) is negative, otherwise there will
9786 // be overflow.
9787 auto ResultLowerThanLHS =
9788 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9789 auto RHSNegative =
9790 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, RHS, Zero);
9791 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9792 } else {
9793 // For subtraction, overflow occurs when the signed comparison of operands
9794 // doesn't match the sign of the result.
9795 auto LHSLessThanRHS =
9796 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, RHS);
9797 auto ResultNegative =
9798 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, Zero);
9799 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9800 }
9801
9802 MIRBuilder.buildCopy(Dst0, NewDst0);
9803 MI.eraseFromParent();
9804
9805 return Legalized;
9806}
9807
9809 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9810 const LLT Ty = MRI.getType(Res);
9811
9812 // sum = LHS + RHS + zext(CarryIn)
9813 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9814 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9815 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9816 MIRBuilder.buildCopy(Res, Sum);
9817
9818 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9819 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9820 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9821 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9822
9823 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9824 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9825
9826 MI.eraseFromParent();
9827 return Legalized;
9828}
9829
9831 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9832 const LLT Ty = MRI.getType(Res);
9833
9834 // Diff = LHS - (RHS + zext(CarryIn))
9835 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9836 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9837 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9838 MIRBuilder.buildCopy(Res, Diff);
9839
9840 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9841 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9842 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9843 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9844 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9845 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9846
9847 MI.eraseFromParent();
9848 return Legalized;
9849}
9850
9853 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9854 LLT Ty = MRI.getType(Res);
9855 bool IsSigned;
9856 bool IsAdd;
9857 unsigned BaseOp;
9858 switch (MI.getOpcode()) {
9859 default:
9860 llvm_unreachable("unexpected addsat/subsat opcode");
9861 case TargetOpcode::G_UADDSAT:
9862 IsSigned = false;
9863 IsAdd = true;
9864 BaseOp = TargetOpcode::G_ADD;
9865 break;
9866 case TargetOpcode::G_SADDSAT:
9867 IsSigned = true;
9868 IsAdd = true;
9869 BaseOp = TargetOpcode::G_ADD;
9870 break;
9871 case TargetOpcode::G_USUBSAT:
9872 IsSigned = false;
9873 IsAdd = false;
9874 BaseOp = TargetOpcode::G_SUB;
9875 break;
9876 case TargetOpcode::G_SSUBSAT:
9877 IsSigned = true;
9878 IsAdd = false;
9879 BaseOp = TargetOpcode::G_SUB;
9880 break;
9881 }
9882
9883 if (IsSigned) {
9884 // sadd.sat(a, b) ->
9885 // hi = 0x7fffffff - smax(a, 0)
9886 // lo = 0x80000000 - smin(a, 0)
9887 // a + smin(smax(lo, b), hi)
9888 // ssub.sat(a, b) ->
9889 // lo = smax(a, -1) - 0x7fffffff
9890 // hi = smin(a, -1) - 0x80000000
9891 // a - smin(smax(lo, b), hi)
9892 // TODO: AMDGPU can use a "median of 3" instruction here:
9893 // a +/- med3(lo, b, hi)
9894 uint64_t NumBits = Ty.getScalarSizeInBits();
9895 auto MaxVal =
9896 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9897 auto MinVal =
9898 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9900 if (IsAdd) {
9901 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9902 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9903 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9904 } else {
9905 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9906 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9907 MaxVal);
9908 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9909 MinVal);
9910 }
9911 auto RHSClamped =
9912 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9913 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9914 } else {
9915 // uadd.sat(a, b) -> a + umin(~a, b)
9916 // usub.sat(a, b) -> a - umin(a, b)
9917 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9918 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9919 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9920 }
9921
9922 MI.eraseFromParent();
9923 return Legalized;
9924}
9925
9928 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9929 LLT Ty = MRI.getType(Res);
9930 LLT BoolTy = Ty.changeElementSize(1);
9931 bool IsSigned;
9932 bool IsAdd;
9933 unsigned OverflowOp;
9934 switch (MI.getOpcode()) {
9935 default:
9936 llvm_unreachable("unexpected addsat/subsat opcode");
9937 case TargetOpcode::G_UADDSAT:
9938 IsSigned = false;
9939 IsAdd = true;
9940 OverflowOp = TargetOpcode::G_UADDO;
9941 break;
9942 case TargetOpcode::G_SADDSAT:
9943 IsSigned = true;
9944 IsAdd = true;
9945 OverflowOp = TargetOpcode::G_SADDO;
9946 break;
9947 case TargetOpcode::G_USUBSAT:
9948 IsSigned = false;
9949 IsAdd = false;
9950 OverflowOp = TargetOpcode::G_USUBO;
9951 break;
9952 case TargetOpcode::G_SSUBSAT:
9953 IsSigned = true;
9954 IsAdd = false;
9955 OverflowOp = TargetOpcode::G_SSUBO;
9956 break;
9957 }
9958
9959 auto OverflowRes =
9960 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9961 Register Tmp = OverflowRes.getReg(0);
9962 Register Ov = OverflowRes.getReg(1);
9963 MachineInstrBuilder Clamp;
9964 if (IsSigned) {
9965 // sadd.sat(a, b) ->
9966 // {tmp, ov} = saddo(a, b)
9967 // ov ? (tmp >>s 31) + 0x80000000 : r
9968 // ssub.sat(a, b) ->
9969 // {tmp, ov} = ssubo(a, b)
9970 // ov ? (tmp >>s 31) + 0x80000000 : r
9971 uint64_t NumBits = Ty.getScalarSizeInBits();
9972 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9973 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9974 auto MinVal =
9975 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9976 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9977 } else {
9978 // uadd.sat(a, b) ->
9979 // {tmp, ov} = uaddo(a, b)
9980 // ov ? 0xffffffff : tmp
9981 // usub.sat(a, b) ->
9982 // {tmp, ov} = usubo(a, b)
9983 // ov ? 0 : tmp
9984 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9985 }
9986 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9987
9988 MI.eraseFromParent();
9989 return Legalized;
9990}
9991
9994 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9995 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9996 "Expected shlsat opcode!");
9997 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9998 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9999 LLT Ty = MRI.getType(Res);
10000 LLT BoolTy = Ty.changeElementSize(1);
10001
10002 unsigned BW = Ty.getScalarSizeInBits();
10003 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
10004 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
10005 : MIRBuilder.buildLShr(Ty, Result, RHS);
10006
10007 MachineInstrBuilder SatVal;
10008 if (IsSigned) {
10009 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
10010 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
10011 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
10012 MIRBuilder.buildConstant(Ty, 0));
10013 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10014 } else {
10015 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
10016 }
10017 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
10018 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10019
10020 MI.eraseFromParent();
10021 return Legalized;
10022}
10023
10025 auto [Dst, Src] = MI.getFirst2Regs();
10026 const LLT Ty = MRI.getType(Src);
10027 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10028 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10029
10030 // Swap most and least significant byte, set remaining bytes in Res to zero.
10031 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10032 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10033 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10034 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10035
10036 // Set i-th high/low byte in Res to i-th low/high byte from Src.
10037 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
10038 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
10039 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10040 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
10041 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10042 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
10043 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
10044 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10045 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10046 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
10047 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10048 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10049 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10050 }
10051 Res.getInstr()->getOperand(0).setReg(Dst);
10052
10053 MI.eraseFromParent();
10054 return Legalized;
10055}
10056
10057//{ (Src & Mask) >> N } | { (Src << N) & Mask }
10059 MachineInstrBuilder Src, const APInt &Mask) {
10060 const LLT Ty = Dst.getLLTTy(*B.getMRI());
10061 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
10062 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
10063 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10064 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10065 return B.buildOr(Dst, LHS, RHS);
10066}
10067
10070 auto [Dst, Src] = MI.getFirst2Regs();
10071 const LLT SrcTy = MRI.getType(Src);
10072 unsigned Size = SrcTy.getScalarSizeInBits();
10073 unsigned VSize = SrcTy.getSizeInBits();
10074
10075 if (Size >= 8) {
10076 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10077 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10078 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10079 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10080 // If bitreverse is legal for i8 vector of the same size, then cast
10081 // to i8 vector type.
10082 // e.g. v4s32 -> v16s8
10083 LLT VTy = LLT::fixed_vector(VSize / 8, LLT::integer(8));
10084 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
10085 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
10086 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
10087 MIRBuilder.buildBitcast(Dst, RBIT);
10088 } else {
10089 MachineInstrBuilder BSWAP =
10090 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10091
10092 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
10093 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
10094 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
10095 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
10096 APInt::getSplat(Size, APInt(8, 0xF0)));
10097
10098 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
10099 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
10100 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
10101 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
10102 APInt::getSplat(Size, APInt(8, 0xCC)));
10103
10104 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
10105 // 6|7
10106 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
10107 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
10108 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
10109 }
10110 } else {
10111 // Expand bitreverse for types smaller than 8 bits.
10113 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
10115 if (I < J) {
10116 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
10117 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
10118 } else {
10119 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
10120 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
10121 }
10122
10123 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
10124 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
10125 if (I == 0)
10126 Tmp = Tmp2;
10127 else
10128 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
10129 }
10130 MIRBuilder.buildCopy(Dst, Tmp);
10131 }
10132
10133 MI.eraseFromParent();
10134 return Legalized;
10135}
10136
10139 MachineFunction &MF = MIRBuilder.getMF();
10140
10141 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10142 int NameOpIdx = IsRead ? 1 : 0;
10143 int ValRegIndex = IsRead ? 0 : 1;
10144
10145 Register ValReg = MI.getOperand(ValRegIndex).getReg();
10146 const LLT Ty = MRI.getType(ValReg);
10147 const MDString *RegStr = cast<MDString>(
10148 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10149
10150 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
10151 if (!PhysReg) {
10152 const Function &Fn = MF.getFunction();
10154 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
10155 (IsRead ? "llvm.read_register" : "llvm.write_register"),
10156 Fn, MI.getDebugLoc()));
10157 if (IsRead)
10158 MIRBuilder.buildUndef(ValReg);
10159
10160 MI.eraseFromParent();
10161 return Legalized;
10162 }
10163
10164 if (IsRead)
10165 MIRBuilder.buildCopy(ValReg, PhysReg);
10166 else
10167 MIRBuilder.buildCopy(PhysReg, ValReg);
10168
10169 MI.eraseFromParent();
10170 return Legalized;
10171}
10172
10175 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
10176 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10177 Register Result = MI.getOperand(0).getReg();
10178 LLT OrigTy = MRI.getType(Result);
10179 auto SizeInBits = OrigTy.getScalarSizeInBits();
10180 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
10181
10182 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
10183 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
10184 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
10185 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10186
10187 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
10188 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
10189 MIRBuilder.buildTrunc(Result, Shifted);
10190
10191 MI.eraseFromParent();
10192 return Legalized;
10193}
10194
10197 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10198 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
10199
10200 if (Mask == fcNone) {
10201 MIRBuilder.buildConstant(DstReg, 0);
10202 MI.eraseFromParent();
10203 return Legalized;
10204 }
10205 if (Mask == fcAllFlags) {
10206 MIRBuilder.buildConstant(DstReg, 1);
10207 MI.eraseFromParent();
10208 return Legalized;
10209 }
10210
10211 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10212 // version
10213
10214 unsigned BitSize = SrcTy.getScalarSizeInBits();
10215 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
10216
10217 LLT IntTy = SrcTy.changeElementType(LLT::integer(BitSize));
10218 auto AsInt = SrcTy == IntTy ? MIRBuilder.buildCopy(IntTy, SrcReg)
10219 : MIRBuilder.buildBitcast(IntTy, SrcReg);
10220
10221 // Various masks.
10222 APInt SignBit = APInt::getSignMask(BitSize);
10223 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10224 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10225 APInt ExpMask = Inf;
10226 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10227 APInt QNaNBitMask =
10228 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10229 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
10230
10231 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
10232 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
10233 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
10234 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
10235 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
10236
10237 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10238 auto Sign =
10239 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
10240
10241 auto Res = MIRBuilder.buildConstant(DstTy, 0);
10242 // Clang doesn't support capture of structured bindings:
10243 LLT DstTyCopy = DstTy;
10244 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10245 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10246 };
10247
10248 // Tests that involve more than one class should be processed first.
10249 if ((Mask & fcFinite) == fcFinite) {
10250 // finite(V) ==> abs(V) u< exp_mask
10251 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10252 ExpMaskC));
10253 Mask &= ~fcFinite;
10254 } else if ((Mask & fcFinite) == fcPosFinite) {
10255 // finite(V) && V > 0 ==> V u< exp_mask
10256 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10257 ExpMaskC));
10258 Mask &= ~fcPosFinite;
10259 } else if ((Mask & fcFinite) == fcNegFinite) {
10260 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10261 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10262 ExpMaskC);
10263 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10264 appendToRes(And);
10265 Mask &= ~fcNegFinite;
10266 }
10267
10268 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10269 // fcZero | fcSubnormal => test all exponent bits are 0
10270 // TODO: Handle sign bit specific cases
10271 // TODO: Handle inverted case
10272 if (PartialCheck == (fcZero | fcSubnormal)) {
10273 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10274 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10275 ExpBits, ZeroC));
10276 Mask &= ~PartialCheck;
10277 }
10278 }
10279
10280 // Check for individual classes.
10281 if (FPClassTest PartialCheck = Mask & fcZero) {
10282 if (PartialCheck == fcPosZero)
10283 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10284 AsInt, ZeroC));
10285 else if (PartialCheck == fcZero)
10286 appendToRes(
10287 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10288 else // fcNegZero
10289 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10290 AsInt, SignBitC));
10291 }
10292
10293 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10294 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10295 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10296 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10297 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10298 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10299 auto SubnormalRes =
10300 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10301 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10302 if (PartialCheck == fcNegSubnormal)
10303 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10304 appendToRes(SubnormalRes);
10305 }
10306
10307 if (FPClassTest PartialCheck = Mask & fcInf) {
10308 if (PartialCheck == fcPosInf)
10309 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10310 AsInt, InfC));
10311 else if (PartialCheck == fcInf)
10312 appendToRes(
10313 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10314 else { // fcNegInf
10315 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10316 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10317 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10318 AsInt, NegInfC));
10319 }
10320 }
10321
10322 if (FPClassTest PartialCheck = Mask & fcNan) {
10323 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10324 if (PartialCheck == fcNan) {
10325 // isnan(V) ==> abs(V) u> int(inf)
10326 appendToRes(
10327 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10328 } else if (PartialCheck == fcQNan) {
10329 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10330 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10331 InfWithQnanBitC));
10332 } else { // fcSNan
10333 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10334 // abs(V) u< (unsigned(Inf) | quiet_bit)
10335 auto IsNan =
10336 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10337 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10338 Abs, InfWithQnanBitC);
10339 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10340 }
10341 }
10342
10343 if (FPClassTest PartialCheck = Mask & fcNormal) {
10344 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10345 // (max_exp-1))
10346 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10347 auto ExpMinusOne = MIRBuilder.buildSub(
10348 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10349 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10350 auto NormalRes =
10351 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10352 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10353 if (PartialCheck == fcNegNormal)
10354 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10355 else if (PartialCheck == fcPosNormal) {
10356 auto PosSign = MIRBuilder.buildXor(
10357 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10358 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10359 }
10360 appendToRes(NormalRes);
10361 }
10362
10363 MIRBuilder.buildCopy(DstReg, Res);
10364 MI.eraseFromParent();
10365 return Legalized;
10366}
10367
10369 // Implement G_SELECT in terms of XOR, AND, OR.
10370 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10371 MI.getFirst4RegLLTs();
10372
10373 LLT Op1TyInt =
10374 Op1Ty.changeElementType(LLT::integer(Op1Ty.getScalarSizeInBits()));
10375
10376 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10377 if (IsEltPtr) {
10378 LLT ScalarPtrTy = LLT::integer(DstTy.getScalarSizeInBits());
10379 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10380 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10381 Op1Ty = MRI.getType(Op1Reg);
10382 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10383 Op2Ty = MRI.getType(Op2Reg);
10384 DstTy = NewTy;
10385 }
10386
10387 if (MaskTy.isScalar()) {
10388 // Turn the scalar condition into a vector condition mask if needed.
10389
10390 Register MaskElt = MaskReg;
10391
10392 // The condition was potentially zero extended before, but we want a sign
10393 // extended boolean.
10394 if (MaskTy != LLT::scalar(1))
10395 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10396
10397 // Continue the sign extension (or truncate) to match the data type.
10398 MaskTy = DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
10399 MaskElt =
10400 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10401
10402 if (DstTy.isVector()) {
10403 // Generate a vector splat idiom.
10404 auto ShufSplat = MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10405 MaskReg = ShufSplat.getReg(0);
10406 } else {
10407 MaskReg = MaskElt;
10408 }
10409 } else if (!DstTy.isVector()) {
10410 // Cannot handle the case that mask is a vector and dst is a scalar.
10411 return UnableToLegalize;
10412 }
10413
10414 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10415 return UnableToLegalize;
10416 }
10417
10418 if (!Op1Ty.getScalarType().isAnyScalar() &&
10419 !Op1Ty.getScalarType().isInteger())
10420 Op1Reg = MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10421
10422 if (!Op2Ty.getScalarType().isAnyScalar() &&
10423 !Op2Ty.getScalarType().isInteger()) {
10424 auto Op2TyInt =
10425 Op2Ty.changeElementType(LLT::integer(Op2Ty.getScalarSizeInBits()));
10426 Op2Reg = MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10427 }
10428
10429 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10430 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10431 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10432 if (IsEltPtr) {
10433 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10434 MIRBuilder.buildIntToPtr(DstReg, Or);
10435 } else {
10436 if (DstTy == Op1TyInt)
10437 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10438 else {
10439 auto Or = MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10440 MIRBuilder.buildBitcast(DstReg, Or.getReg(0));
10441 }
10442 }
10443 MI.eraseFromParent();
10444 return Legalized;
10445}
10446
10448 // Split DIVREM into individual instructions.
10449 unsigned Opcode = MI.getOpcode();
10450
10451 MIRBuilder.buildInstr(
10452 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10453 : TargetOpcode::G_UDIV,
10454 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10455 MIRBuilder.buildInstr(
10456 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10457 : TargetOpcode::G_UREM,
10458 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10459 MI.eraseFromParent();
10460 return Legalized;
10461}
10462
10465 // Expand %res = G_ABS %a into:
10466 // %v1 = G_ASHR %a, scalar_size-1
10467 // %v2 = G_ADD %a, %v1
10468 // %res = G_XOR %v2, %v1
10469 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10470 Register OpReg = MI.getOperand(1).getReg();
10471 auto ShiftAmt =
10472 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10473 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10474 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10475 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10476 MI.eraseFromParent();
10477 return Legalized;
10478}
10479
10482 // Expand %res = G_ABS %a into:
10483 // %v1 = G_CONSTANT 0
10484 // %v2 = G_SUB %v1, %a
10485 // %res = G_SMAX %a, %v2
10486 Register SrcReg = MI.getOperand(1).getReg();
10487 LLT Ty = MRI.getType(SrcReg);
10488 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10489 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10490 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10491 MI.eraseFromParent();
10492 return Legalized;
10493}
10494
10497 Register SrcReg = MI.getOperand(1).getReg();
10498 Register DestReg = MI.getOperand(0).getReg();
10499 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10500 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10501 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10502 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10503 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10504 MI.eraseFromParent();
10505 return Legalized;
10506}
10507
10510 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10511 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10512 "Expected G_ABDS or G_ABDU instruction");
10513
10514 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10515 LLT Ty = MRI.getType(LHS);
10516
10517 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10518 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10519 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10520 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10521 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10524 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10525 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10526
10527 MI.eraseFromParent();
10528 return Legalized;
10529}
10530
10533 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10534 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10535 "Expected G_ABDS or G_ABDU instruction");
10536
10537 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10538 LLT Ty = MRI.getType(LHS);
10539
10540 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10541 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10542 Register MaxReg, MinReg;
10543 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10544 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10545 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10546 } else {
10547 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10548 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10549 }
10550 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10551
10552 MI.eraseFromParent();
10553 return Legalized;
10554}
10555
10557 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10558 LLT TyInt =
10559 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
10560 Register CastedSrc = SrcReg;
10561
10562 if (!(SrcTy.getScalarType().isAnyScalar() ||
10563 SrcTy.getScalarType().isInteger())) {
10564 auto SrcTyInt =
10565 SrcTy.changeElementType(LLT::integer(SrcTy.getScalarSizeInBits()));
10566 CastedSrc = MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10567 }
10568
10569 if (MRI.getType(DstReg) != TyInt) {
10570 // Reset sign bit
10571 Register NewDst =
10573 .buildAnd(TyInt, CastedSrc,
10574 MIRBuilder.buildConstant(
10576 DstTy.getScalarSizeInBits())))
10577 .getReg(0);
10578
10579 MIRBuilder.buildBitcast(DstReg, NewDst);
10580 } else
10582 .buildAnd(
10583 DstReg, CastedSrc,
10584 MIRBuilder.buildConstant(
10585 TyInt, APInt::getSignedMaxValue(DstTy.getScalarSizeInBits())))
10586 .getReg(0);
10587
10588 MI.eraseFromParent();
10589 return Legalized;
10590}
10591
10594 Register SrcReg = MI.getOperand(1).getReg();
10595 LLT SrcTy = MRI.getType(SrcReg);
10596 LLT DstTy = MRI.getType(SrcReg);
10597
10598 // The source could be a scalar if the IR type was <1 x sN>.
10599 if (SrcTy.isScalar()) {
10600 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10601 return UnableToLegalize; // FIXME: handle extension.
10602 // This can be just a plain copy.
10603 Observer.changingInstr(MI);
10604 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10605 Observer.changedInstr(MI);
10606 return Legalized;
10607 }
10608 return UnableToLegalize;
10609}
10610
10612 MachineFunction &MF = *MI.getMF();
10613 const DataLayout &DL = MIRBuilder.getDataLayout();
10614 LLVMContext &Ctx = MF.getFunction().getContext();
10615 Register ListPtr = MI.getOperand(1).getReg();
10616 LLT PtrTy = MRI.getType(ListPtr);
10617
10618 // LstPtr is a pointer to the head of the list. Get the address
10619 // of the head of the list.
10620 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10621 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10622 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10623 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10624
10625 const Align A(MI.getOperand(2).getImm());
10626 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10627 if (A > TLI.getMinStackArgumentAlignment()) {
10628 Register AlignAmt =
10629 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10630 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10631 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10632 VAList = AndDst.getReg(0);
10633 }
10634
10635 // Increment the pointer, VAList, to the next vaarg
10636 // The list should be bumped by the size of element in the current head of
10637 // list.
10638 Register Dst = MI.getOperand(0).getReg();
10639 LLT LLTTy = MRI.getType(Dst);
10640 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10641 auto IncAmt =
10642 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10643 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10644
10645 // Store the increment VAList to the legalized pointer
10647 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10648 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10649 // Load the actual argument out of the pointer VAList
10650 Align EltAlignment = DL.getABITypeAlign(Ty);
10651 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10652 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10653 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10654
10655 MI.eraseFromParent();
10656 return Legalized;
10657}
10658
10660 // On Darwin, -Os means optimize for size without hurting performance, so
10661 // only really optimize for size when -Oz (MinSize) is used.
10663 return MF.getFunction().hasMinSize();
10664 return MF.getFunction().hasOptSize();
10665}
10666
10667// Returns a list of types to use for memory op lowering in MemOps. A partial
10668// port of findOptimalMemOpLowering in TargetLowering.
10669static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10670 unsigned Limit, const MemOp &Op,
10671 unsigned DstAS, unsigned SrcAS,
10672 const AttributeList &FuncAttributes,
10673 const TargetLowering &TLI) {
10674 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10675 return false;
10676
10677 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10678
10679 if (Ty == LLT()) {
10680 // Use the largest scalar type whose alignment constraints are satisfied.
10681 // We only need to check DstAlign here as SrcAlign is always greater or
10682 // equal to DstAlign (or zero).
10683 Ty = LLT::integer(64);
10684 if (Op.isFixedDstAlign())
10685 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10686 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10687 Ty = LLT::integer(Ty.getSizeInBytes());
10688 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10689 // FIXME: check for the largest legal type we can load/store to.
10690 }
10691
10692 unsigned NumMemOps = 0;
10693 uint64_t Size = Op.size();
10694 while (Size) {
10695 unsigned TySize = Ty.getSizeInBytes();
10696 while (TySize > Size) {
10697 // For now, only use non-vector load / store's for the left-over pieces.
10698 LLT NewTy = Ty;
10699 // FIXME: check for mem op safety and legality of the types. Not all of
10700 // SDAGisms map cleanly to GISel concepts.
10701 if (NewTy.isVector())
10702 NewTy =
10703 NewTy.getSizeInBits() > 64 ? LLT::integer(64) : LLT::integer(32);
10704 NewTy = LLT::integer(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10705 unsigned NewTySize = NewTy.getSizeInBytes();
10706 assert(NewTySize > 0 && "Could not find appropriate type");
10707
10708 // If the new LLT cannot cover all of the remaining bits, then consider
10709 // issuing a (or a pair of) unaligned and overlapping load / store.
10710 unsigned Fast;
10711 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10712 MVT VT = getMVTForLLT(Ty);
10713 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10715 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10717 Fast)
10718 TySize = Size;
10719 else {
10720 Ty = NewTy;
10721 TySize = NewTySize;
10722 }
10723 }
10724
10725 if (++NumMemOps > Limit)
10726 return false;
10727
10728 MemOps.push_back(Ty);
10729 Size -= TySize;
10730 }
10731
10732 return true;
10733}
10734
10735// Get a vectorized representation of the memset value operand, GISel edition.
10737 MachineRegisterInfo &MRI = *MIB.getMRI();
10738 unsigned NumBits = Ty.getScalarSizeInBits();
10739 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10740 if (!Ty.isVector() && ValVRegAndVal) {
10741 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10742 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10743 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10744 }
10745
10746 // Extend the byte value to the larger type, and then multiply by a magic
10747 // value 0x010101... in order to replicate it across every byte.
10748 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10749 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10750 return MIB.buildConstant(Ty, 0).getReg(0);
10751 }
10752
10753 LLT ExtType = Ty.getScalarType();
10754 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10755 if (NumBits > 8) {
10756 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10757 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10758 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10759 }
10760
10761 // For vector types create a G_BUILD_VECTOR.
10762 if (Ty.isVector())
10763 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10764
10765 return Val;
10766}
10767
10769LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10770 uint64_t KnownLen, Align Alignment,
10771 bool IsVolatile) {
10772 auto &MF = *MI.getParent()->getParent();
10773 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10774 auto &DL = MF.getDataLayout();
10775 LLVMContext &C = MF.getFunction().getContext();
10776
10777 assert(KnownLen != 0 && "Have a zero length memset length!");
10778
10779 bool DstAlignCanChange = false;
10780 MachineFrameInfo &MFI = MF.getFrameInfo();
10781 bool OptSize = shouldLowerMemFuncForSize(MF);
10782
10783 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10784 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10785 DstAlignCanChange = true;
10786
10787 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10788 std::vector<LLT> MemOps;
10789
10790 const auto &DstMMO = **MI.memoperands_begin();
10791 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10792
10793 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10794 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10795
10796 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10797 MemOp::Set(KnownLen, DstAlignCanChange,
10798 Alignment,
10799 /*IsZeroMemset=*/IsZeroVal,
10800 /*IsVolatile=*/IsVolatile),
10801 DstPtrInfo.getAddrSpace(), ~0u,
10802 MF.getFunction().getAttributes(), TLI))
10803 return UnableToLegalize;
10804
10805 if (DstAlignCanChange) {
10806 // Get an estimate of the type from the LLT.
10807 Type *IRTy = getTypeForLLT(MemOps[0], C);
10808 Align NewAlign = DL.getABITypeAlign(IRTy);
10809 if (NewAlign > Alignment) {
10810 Alignment = NewAlign;
10811 unsigned FI = FIDef->getOperand(1).getIndex();
10812 // Give the stack frame object a larger alignment if needed.
10813 if (MFI.getObjectAlign(FI) < Alignment)
10814 MFI.setObjectAlignment(FI, Alignment);
10815 }
10816 }
10817
10818 MachineIRBuilder MIB(MI);
10819 // Find the largest store and generate the bit pattern for it.
10820 LLT LargestTy = MemOps[0];
10821 for (unsigned i = 1; i < MemOps.size(); i++)
10822 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10823 LargestTy = MemOps[i];
10824
10825 // The memset stored value is always defined as an s8, so in order to make it
10826 // work with larger store types we need to repeat the bit pattern across the
10827 // wider type.
10828 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10829
10830 if (!MemSetValue)
10831 return UnableToLegalize;
10832
10833 // Generate the stores. For each store type in the list, we generate the
10834 // matching store of that type to the destination address.
10835 LLT PtrTy = MRI.getType(Dst);
10836 unsigned DstOff = 0;
10837 unsigned Size = KnownLen;
10838 for (unsigned I = 0; I < MemOps.size(); I++) {
10839 LLT Ty = MemOps[I];
10840 unsigned TySize = Ty.getSizeInBytes();
10841 if (TySize > Size) {
10842 // Issuing an unaligned load / store pair that overlaps with the previous
10843 // pair. Adjust the offset accordingly.
10844 assert(I == MemOps.size() - 1 && I != 0);
10845 DstOff -= TySize - Size;
10846 }
10847
10848 // If this store is smaller than the largest store see whether we can get
10849 // the smaller value for free with a truncate.
10850 Register Value = MemSetValue;
10851 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10852 MVT VT = getMVTForLLT(Ty);
10853 MVT LargestVT = getMVTForLLT(LargestTy);
10854 if (!LargestTy.isVector() && !Ty.isVector() &&
10855 TLI.isTruncateFree(LargestVT, VT))
10856 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10857 else
10858 Value = getMemsetValue(Val, Ty, MIB);
10859 if (!Value)
10860 return UnableToLegalize;
10861 }
10862
10863 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10864
10865 Register Ptr = Dst;
10866 if (DstOff != 0) {
10867 auto Offset =
10868 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10869 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10870 }
10871
10872 MIB.buildStore(Value, Ptr, *StoreMMO);
10873 DstOff += Ty.getSizeInBytes();
10874 Size -= TySize;
10875 }
10876
10877 MI.eraseFromParent();
10878 return Legalized;
10879}
10880
10882LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10883 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10884
10885 auto [Dst, Src, Len] = MI.getFirst3Regs();
10886
10887 const auto *MMOIt = MI.memoperands_begin();
10888 const MachineMemOperand *MemOp = *MMOIt;
10889 bool IsVolatile = MemOp->isVolatile();
10890
10891 // See if this is a constant length copy
10892 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10893 // FIXME: support dynamically sized G_MEMCPY_INLINE
10894 assert(LenVRegAndVal &&
10895 "inline memcpy with dynamic size is not yet supported");
10896 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10897 if (KnownLen == 0) {
10898 MI.eraseFromParent();
10899 return Legalized;
10900 }
10901
10902 const auto &DstMMO = **MI.memoperands_begin();
10903 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10904 Align DstAlign = DstMMO.getBaseAlign();
10905 Align SrcAlign = SrcMMO.getBaseAlign();
10906
10907 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10908 IsVolatile);
10909}
10910
10912LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10913 uint64_t KnownLen, Align DstAlign,
10914 Align SrcAlign, bool IsVolatile) {
10915 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10916 return lowerMemcpy(MI, Dst, Src, KnownLen,
10917 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10918 IsVolatile);
10919}
10920
10922LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10923 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10924 Align SrcAlign, bool IsVolatile) {
10925 auto &MF = *MI.getParent()->getParent();
10926 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10927 auto &DL = MF.getDataLayout();
10929
10930 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10931
10932 bool DstAlignCanChange = false;
10933 MachineFrameInfo &MFI = MF.getFrameInfo();
10934 Align Alignment = std::min(DstAlign, SrcAlign);
10935
10936 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10937 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10938 DstAlignCanChange = true;
10939
10940 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10941 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10942 // if the memcpy is in a tail call position.
10943
10944 std::vector<LLT> MemOps;
10945
10946 const auto &DstMMO = **MI.memoperands_begin();
10947 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10948 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10949 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10950
10952 MemOps, Limit,
10953 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10954 IsVolatile),
10955 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10956 MF.getFunction().getAttributes(), TLI))
10957 return UnableToLegalize;
10958
10959 if (DstAlignCanChange) {
10960 // Get an estimate of the type from the LLT.
10961 Type *IRTy = getTypeForLLT(MemOps[0], C);
10962 Align NewAlign = DL.getABITypeAlign(IRTy);
10963
10964 // Don't promote to an alignment that would require dynamic stack
10965 // realignment.
10967 if (!TRI->hasStackRealignment(MF))
10968 if (MaybeAlign StackAlign = DL.getStackAlignment())
10969 NewAlign = std::min(NewAlign, *StackAlign);
10970
10971 if (NewAlign > Alignment) {
10972 Alignment = NewAlign;
10973 unsigned FI = FIDef->getOperand(1).getIndex();
10974 // Give the stack frame object a larger alignment if needed.
10975 if (MFI.getObjectAlign(FI) < Alignment)
10976 MFI.setObjectAlignment(FI, Alignment);
10977 }
10978 }
10979
10980 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10981
10982 MachineIRBuilder MIB(MI);
10983 // Now we need to emit a pair of load and stores for each of the types we've
10984 // collected. I.e. for each type, generate a load from the source pointer of
10985 // that type width, and then generate a corresponding store to the dest buffer
10986 // of that value loaded. This can result in a sequence of loads and stores
10987 // mixed types, depending on what the target specifies as good types to use.
10988 unsigned CurrOffset = 0;
10989 unsigned Size = KnownLen;
10990 for (auto CopyTy : MemOps) {
10991 // Issuing an unaligned load / store pair that overlaps with the previous
10992 // pair. Adjust the offset accordingly.
10993 if (CopyTy.getSizeInBytes() > Size)
10994 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10995
10996 // Construct MMOs for the accesses.
10997 auto *LoadMMO =
10998 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10999 auto *StoreMMO =
11000 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
11001
11002 // Create the load.
11003 Register LoadPtr = Src;
11005 if (CurrOffset != 0) {
11006 LLT SrcTy = MRI.getType(Src);
11007 Offset =
11008 MIB.buildConstant(LLT::integer(SrcTy.getSizeInBits()), CurrOffset)
11009 .getReg(0);
11010 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
11011 }
11012 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11013
11014 // Create the store.
11015 Register StorePtr = Dst;
11016 if (CurrOffset != 0) {
11017 LLT DstTy = MRI.getType(Dst);
11018 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
11019 }
11020 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11021 CurrOffset += CopyTy.getSizeInBytes();
11022 Size -= CopyTy.getSizeInBytes();
11023 }
11024
11025 MI.eraseFromParent();
11026 return Legalized;
11027}
11028
11030LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
11031 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
11032 bool IsVolatile) {
11033 auto &MF = *MI.getParent()->getParent();
11034 const auto &TLI = *MF.getSubtarget().getTargetLowering();
11035 auto &DL = MF.getDataLayout();
11036 LLVMContext &C = MF.getFunction().getContext();
11037
11038 assert(KnownLen != 0 && "Have a zero length memmove length!");
11039
11040 bool DstAlignCanChange = false;
11041 MachineFrameInfo &MFI = MF.getFrameInfo();
11042 bool OptSize = shouldLowerMemFuncForSize(MF);
11043 Align Alignment = std::min(DstAlign, SrcAlign);
11044
11045 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11046 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
11047 DstAlignCanChange = true;
11048
11049 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11050 std::vector<LLT> MemOps;
11051
11052 const auto &DstMMO = **MI.memoperands_begin();
11053 const auto &SrcMMO = **std::next(MI.memoperands_begin());
11054 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11055 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11056
11057 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
11058 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
11059 // same thing here.
11061 MemOps, Limit,
11062 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11063 /*IsVolatile*/ true),
11064 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
11065 MF.getFunction().getAttributes(), TLI))
11066 return UnableToLegalize;
11067
11068 if (DstAlignCanChange) {
11069 // Get an estimate of the type from the LLT.
11070 Type *IRTy = getTypeForLLT(MemOps[0], C);
11071 Align NewAlign = DL.getABITypeAlign(IRTy);
11072
11073 // Don't promote to an alignment that would require dynamic stack
11074 // realignment.
11075 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
11076 if (!TRI->hasStackRealignment(MF))
11077 if (MaybeAlign StackAlign = DL.getStackAlignment())
11078 NewAlign = std::min(NewAlign, *StackAlign);
11079
11080 if (NewAlign > Alignment) {
11081 Alignment = NewAlign;
11082 unsigned FI = FIDef->getOperand(1).getIndex();
11083 // Give the stack frame object a larger alignment if needed.
11084 if (MFI.getObjectAlign(FI) < Alignment)
11085 MFI.setObjectAlignment(FI, Alignment);
11086 }
11087 }
11088
11089 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
11090
11091 MachineIRBuilder MIB(MI);
11092 // Memmove requires that we perform the loads first before issuing the stores.
11093 // Apart from that, this loop is pretty much doing the same thing as the
11094 // memcpy codegen function.
11095 unsigned CurrOffset = 0;
11096 SmallVector<Register, 16> LoadVals;
11097 for (auto CopyTy : MemOps) {
11098 // Construct MMO for the load.
11099 auto *LoadMMO =
11100 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
11101
11102 // Create the load.
11103 Register LoadPtr = Src;
11104 if (CurrOffset != 0) {
11105 LLT SrcTy = MRI.getType(Src);
11106 auto Offset =
11107 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
11108 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
11109 }
11110 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11111 CurrOffset += CopyTy.getSizeInBytes();
11112 }
11113
11114 CurrOffset = 0;
11115 for (unsigned I = 0; I < MemOps.size(); ++I) {
11116 LLT CopyTy = MemOps[I];
11117 // Now store the values loaded.
11118 auto *StoreMMO =
11119 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
11120
11121 Register StorePtr = Dst;
11122 if (CurrOffset != 0) {
11123 LLT DstTy = MRI.getType(Dst);
11124 auto Offset =
11125 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
11126 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
11127 }
11128 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
11129 CurrOffset += CopyTy.getSizeInBytes();
11130 }
11131 MI.eraseFromParent();
11132 return Legalized;
11133}
11134
11137 const unsigned Opc = MI.getOpcode();
11138 // This combine is fairly complex so it's not written with a separate
11139 // matcher function.
11140 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
11141 Opc == TargetOpcode::G_MEMSET) &&
11142 "Expected memcpy like instruction");
11143
11144 auto MMOIt = MI.memoperands_begin();
11145 const MachineMemOperand *MemOp = *MMOIt;
11146
11147 Align DstAlign = MemOp->getBaseAlign();
11148 Align SrcAlign;
11149 auto [Dst, Src, Len] = MI.getFirst3Regs();
11150
11151 if (Opc != TargetOpcode::G_MEMSET) {
11152 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
11153 MemOp = *(++MMOIt);
11154 SrcAlign = MemOp->getBaseAlign();
11155 }
11156
11157 // See if this is a constant length copy
11158 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
11159 if (!LenVRegAndVal)
11160 return UnableToLegalize;
11161 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11162
11163 if (KnownLen == 0) {
11164 MI.eraseFromParent();
11165 return Legalized;
11166 }
11167
11168 if (MaxLen && KnownLen > MaxLen)
11169 return UnableToLegalize;
11170
11171 bool IsVolatile = MemOp->isVolatile();
11172 if (Opc == TargetOpcode::G_MEMCPY) {
11173 auto &MF = *MI.getParent()->getParent();
11174 const auto &TLI = *MF.getSubtarget().getTargetLowering();
11175 bool OptSize = shouldLowerMemFuncForSize(MF);
11176 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11177 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11178 IsVolatile);
11179 }
11180 if (Opc == TargetOpcode::G_MEMMOVE)
11181 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11182 if (Opc == TargetOpcode::G_MEMSET)
11183 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
11184 return UnableToLegalize;
11185}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
APInt bitcastToAPInt() const
Definition APFloat.h:1426
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1193
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1027
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:644
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1995
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:293
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1522
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1579
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1151
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1146
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1884
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1234
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:610
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.