LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
644 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
645}
646
647// Useful for libcalls where all operands have the same type.
650 unsigned Size, Type *OpType,
651 LostDebugLocObserver &LocObserver) const {
652 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
653
654 // FIXME: What does the original arg index mean here?
656 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
657 Args.push_back({MO.getReg(), OpType, 0});
658 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
659 LocObserver, &MI);
660}
661
662LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
663 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
664 LostDebugLocObserver &LocObserver) {
665 MachineFunction &MF = *MI.getMF();
667
668 Register DstSin = MI.getOperand(0).getReg();
669 Register DstCos = MI.getOperand(1).getReg();
670 Register Src = MI.getOperand(2).getReg();
671 LLT DstTy = MRI.getType(DstSin);
672
673 int MemSize = DstTy.getSizeInBytes();
674 Align Alignment = getStackTemporaryAlignment(DstTy);
676 unsigned AddrSpace = DL.getAllocaAddrSpace();
677 MachinePointerInfo PtrInfo;
678
679 Register StackPtrSin =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682 Register StackPtrCos =
683 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
684 .getReg(0);
685
686 auto &Ctx = MF.getFunction().getContext();
687 auto LibcallResult = createLibcall(
688 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
689 {{Src, OpType, 0},
690 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
691 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
692 LocObserver, &MI);
693
694 if (LibcallResult != LegalizeResult::Legalized)
696
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701
702 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
705
707}
708
710LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
711 unsigned Size, Type *OpType,
712 LostDebugLocObserver &LocObserver) {
713 MachineFunction &MF = MIRBuilder.getMF();
714 MachineRegisterInfo &MRI = MF.getRegInfo();
715
716 Register DstFrac = MI.getOperand(0).getReg();
717 Register DstInt = MI.getOperand(1).getReg();
718 Register Src = MI.getOperand(2).getReg();
719 LLT DstTy = MRI.getType(DstFrac);
720
721 int MemSize = DstTy.getSizeInBytes();
722 Align Alignment = getStackTemporaryAlignment(DstTy);
723 const DataLayout &DL = MIRBuilder.getDataLayout();
724 unsigned AddrSpace = DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
726
727 Register StackPtrInt =
728 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
729 .getReg(0);
730
731 auto &Ctx = MF.getFunction().getContext();
732 auto LibcallResult = createLibcall(
733 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
735 LocObserver, &MI);
736
737 if (LibcallResult != LegalizeResult::Legalized)
739
741 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
742
743 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
745
747}
748
749static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
750 Type *FromType) {
751 auto ToMVT = MVT::getVT(ToType);
752 auto FromMVT = MVT::getVT(FromType);
753
754 switch (Opcode) {
755 case TargetOpcode::G_FPEXT:
756 return RTLIB::getFPEXT(FromMVT, ToMVT);
757 case TargetOpcode::G_FPTRUNC:
758 return RTLIB::getFPROUND(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTOSI:
760 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOUI:
762 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
763 case TargetOpcode::G_SITOFP:
764 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
765 case TargetOpcode::G_UITOFP:
766 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
767 }
768 llvm_unreachable("Unsupported libcall function");
769}
770
772 MachineInstr &MI, Type *ToType, Type *FromType,
773 LostDebugLocObserver &LocObserver, bool IsSigned) const {
774 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.Flags[0].setSExt();
778 else
779 Arg.Flags[0].setZExt();
780 }
781
782 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
783 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
784 LocObserver, &MI);
785}
786
789 LostDebugLocObserver &LocObserver) const {
790 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791
793 // Add all the args, except for the last which is an imm denoting 'tail'.
794 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
795 Register Reg = MI.getOperand(i).getReg();
796
797 // Need derive an IR type for call lowering.
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy = nullptr;
800 if (OpLLT.isPointer())
801 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
802 else
803 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
804 Args.push_back({Reg, OpTy, 0});
805 }
806
807 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc = MI.getOpcode();
810 switch (Opc) {
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
813 break;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
817 break;
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
821 break;
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
825 break;
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 if (!Libcalls) // FIXME: Should be mandatory
832
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (RTLibcallImpl == RTLIB::Unsupported) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
844
845 StringRef LibcallName =
847 Info.Callee = MachineOperand::CreateES(LibcallName.data());
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 if (!Libcalls) // FIXME: Should be mandatory
985
986 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
987 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
989
990 // Unsupported libcall on the target.
991 if (RTLibcallImpl == RTLIB::Unsupported) {
992 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
993 << MIRBuilder.getTII().getName(Opc) << "\n");
995 }
996
998 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
999
1000 StringRef LibcallName =
1002 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1003 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1004
1005 llvm::append_range(Info.OrigArgs, Args);
1006 if (!CLI.lowerCall(MIRBuilder, Info))
1008
1010}
1011
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1018 break;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1022 break;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1025 break;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1029 break;
1030 default:
1031 llvm_unreachable("Unexpected opcode");
1032 }
1033 return RTLibcall;
1034}
1035
1036// Some library functions that read FP state (fegetmode, fegetenv) write the
1037// state into a region in memory. IR intrinsics that do the same operations
1038// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1039// intrinsics via the library functions, we need to use temporary variable,
1040// for example:
1041//
1042// %0:_(s32) = G_GET_FPMODE
1043//
1044// is transformed to:
1045//
1046// %1:_(p0) = G_FRAME_INDEX %stack.0
1047// BL &fegetmode
1048// %0:_(s32) = G_LOAD % 1
1049//
1051LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1052 LostDebugLocObserver &LocObserver) {
1053 const DataLayout &DL = MIRBuilder.getDataLayout();
1054 auto &MF = MIRBuilder.getMF();
1055 auto &MRI = *MIRBuilder.getMRI();
1056 auto &Ctx = MF.getFunction().getContext();
1057
1058 // Create temporary, where library function will put the read state.
1059 Register Dst = MI.getOperand(0).getReg();
1060 LLT StateTy = MRI.getType(Dst);
1061 TypeSize StateSize = StateTy.getSizeInBytes();
1062 Align TempAlign = getStackTemporaryAlignment(StateTy);
1063 MachinePointerInfo TempPtrInfo;
1064 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1065
1066 // Create a call to library function, with the temporary as an argument.
1067 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1068 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1069 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1070 auto Res = createLibcall(
1071 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1073 nullptr);
1074 if (Res != LegalizerHelper::Legalized)
1075 return Res;
1076
1077 // Create a load from the temporary.
1078 MachineMemOperand *MMO = MF.getMachineMemOperand(
1079 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1081
1083}
1084
1085// Similar to `createGetStateLibcall` the function calls a library function
1086// using transient space in stack. In this case the library function reads
1087// content of memory region.
1089LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1090 LostDebugLocObserver &LocObserver) {
1091 const DataLayout &DL = MIRBuilder.getDataLayout();
1092 auto &MF = MIRBuilder.getMF();
1093 auto &MRI = *MIRBuilder.getMRI();
1094 auto &Ctx = MF.getFunction().getContext();
1095
1096 // Create temporary, where library function will get the new state.
1097 Register Src = MI.getOperand(0).getReg();
1098 LLT StateTy = MRI.getType(Src);
1099 TypeSize StateSize = StateTy.getSizeInBytes();
1100 Align TempAlign = getStackTemporaryAlignment(StateTy);
1101 MachinePointerInfo TempPtrInfo;
1102 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1103
1104 // Put the new state into the temporary.
1105 MachineMemOperand *MMO = MF.getMachineMemOperand(
1106 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1107 MIRBuilder.buildStore(Src, Temp, *MMO);
1108
1109 // Create a call to library function, with the temporary as an argument.
1110 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1111 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1112 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1113 return createLibcall(RTLibcall,
1114 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver, nullptr);
1117}
1118
1119/// Returns the corresponding libcall for the given Pred and
1120/// the ICMP predicate that should be generated to compare with #0
1121/// after the libcall.
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1125 do { \
1126 switch (Size) { \
1127 case 32: \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1129 case 64: \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1131 case 128: \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1133 default: \
1134 llvm_unreachable("unexpected size"); \
1135 } \
1136 } while (0)
1137
1138 switch (Pred) {
1139 case CmpInst::FCMP_OEQ:
1141 case CmpInst::FCMP_UNE:
1143 case CmpInst::FCMP_OGE:
1145 case CmpInst::FCMP_OLT:
1147 case CmpInst::FCMP_OLE:
1149 case CmpInst::FCMP_OGT:
1151 case CmpInst::FCMP_UNO:
1153 default:
1154 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1155 }
1156}
1157
1159LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1160 LostDebugLocObserver &LocObserver) {
1161 auto &MF = MIRBuilder.getMF();
1162 auto &Ctx = MF.getFunction().getContext();
1163 const GFCmp *Cmp = cast<GFCmp>(&MI);
1164
1165 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1166 unsigned Size = OpLLT.getSizeInBits();
1167 if ((Size != 32 && Size != 64 && Size != 128) ||
1168 OpLLT != MRI.getType(Cmp->getRHSReg()))
1169 return UnableToLegalize;
1170
1171 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1172
1173 // DstReg type is s32
1174 const Register DstReg = Cmp->getReg(0);
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond = Cmp->getCond();
1177
1178 // Reference:
1179 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1180 // Generates a libcall followed by ICMP.
1181 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1182 const CmpInst::Predicate ICmpPred,
1183 const DstOp &Res) -> Register {
1184 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1185 constexpr LLT TempLLT = LLT::scalar(32);
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1187 // Generate libcall, holding result in Temp
1188 const auto Status = createLibcall(
1189 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1190 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1191 LocObserver, &MI);
1192 if (!Status)
1193 return {};
1194
1195 // Compare temp with #0 to get the final result.
1196 return MIRBuilder
1197 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1198 .getReg(0);
1199 };
1200
1201 // Simple case if we have a direct mapping from predicate to libcall
1202 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1204 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1205 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1206 return Legalized;
1207 }
1208 return UnableToLegalize;
1209 }
1210
1211 // No direct mapping found, should be generated as combination of libcalls.
1212
1213 switch (Cond) {
1214 case CmpInst::FCMP_UEQ: {
1215 // FCMP_UEQ: unordered or equal
1216 // Convert into (FCMP_OEQ || FCMP_UNO).
1217
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1221
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1225 if (Oeq && Uno)
1226 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1227 else
1228 return UnableToLegalize;
1229
1230 break;
1231 }
1232 case CmpInst::FCMP_ONE: {
1233 // FCMP_ONE: ordered and operands are unequal
1234 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1235
1236 // We inverse the predicate instead of generating a NOT
1237 // to save one instruction.
1238 // On AArch64 isel can even select two cmp into a single ccmp.
1239 const auto [OeqLibcall, OeqPred] =
1241 const auto NotOeq =
1242 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1243
1244 const auto [UnoLibcall, UnoPred] =
1246 const auto NotUno =
1247 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1248
1249 if (NotOeq && NotUno)
1250 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1251 else
1252 return UnableToLegalize;
1253
1254 break;
1255 }
1256 case CmpInst::FCMP_ULT:
1257 case CmpInst::FCMP_UGE:
1258 case CmpInst::FCMP_UGT:
1259 case CmpInst::FCMP_ULE:
1260 case CmpInst::FCMP_ORD: {
1261 // Convert into: !(inverse(Pred))
1262 // E.g. FCMP_ULT becomes !FCMP_OGE
1263 // This is equivalent to the following, but saves some instructions.
1264 // MIRBuilder.buildNot(
1265 // PredTy,
1266 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1267 // Op1, Op2));
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1271 CmpInst::getInversePredicate(InversedPred), DstReg))
1272 return UnableToLegalize;
1273 break;
1274 }
1275 default:
1276 return UnableToLegalize;
1277 }
1278
1279 return Legalized;
1280}
1281
1282// The function is used to legalize operations that set default environment
1283// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1284// On most targets supported in glibc FE_DFL_MODE is defined as
1285// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1286// it is not true, the target must provide custom lowering.
1288LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1289 LostDebugLocObserver &LocObserver) {
1290 const DataLayout &DL = MIRBuilder.getDataLayout();
1291 auto &MF = MIRBuilder.getMF();
1292 auto &Ctx = MF.getFunction().getContext();
1293
1294 // Create an argument for the library function.
1295 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1296 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1297 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1298 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1299 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1301 MIRBuilder.buildIntToPtr(Dest, DefValue);
1302
1303 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1304 return createLibcall(
1305 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1307}
1308
1311 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1312
1313 switch (MI.getOpcode()) {
1314 default:
1315 return UnableToLegalize;
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = IntegerType::get(Ctx, Size);
1325 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 break;
1329 }
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1366 unsigned Size = LLTy.getSizeInBits();
1367 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1368 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1369 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1370 return UnableToLegalize;
1371 }
1372 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1373 if (Status != Legalized)
1374 return Status;
1375 break;
1376 }
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1379 unsigned Size = LLTy.getSizeInBits();
1380 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1381 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1382 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1383 return UnableToLegalize;
1384 }
1385 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1386 }
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1389 unsigned Size = LLTy.getSizeInBits();
1390 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1391 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1392 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1393 return UnableToLegalize;
1394 }
1395 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1396 }
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1402 unsigned Size = LLTy.getSizeInBits();
1403 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1404 Type *ITy = IntegerType::get(
1405 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1406 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1407 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1408 return UnableToLegalize;
1409 }
1410 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1412 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1413 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1414 if (Status != Legalized)
1415 return Status;
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1422 unsigned Size = LLTy.getSizeInBits();
1423 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1424 Type *ITy = IntegerType::get(
1425 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1426 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1427 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1428 return UnableToLegalize;
1429 }
1430 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1432 {MI.getOperand(1).getReg(), HLTy, 0},
1433 {MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1447 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 FromTy, LocObserver);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1483 if (Status != Legalized)
1484 return Status;
1485 break;
1486 }
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 if (Status != Legalized)
1497 return Status;
1498 break;
1499 }
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1504 LegalizeResult Result =
1505 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1506 if (Result != Legalized)
1507 return Result;
1508 MI.eraseFromParent();
1509 return Result;
1510 }
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1513 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1514 if (Result != Legalized)
1515 return Result;
1516 break;
1517 }
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1520 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1521 if (Result != Legalized)
1522 return Result;
1523 break;
1524 }
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1527 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1528 if (Result != Legalized)
1529 return Result;
1530 break;
1531 }
1532 }
1533
1534 MI.eraseFromParent();
1535 return Legalized;
1536}
1537
1539 unsigned TypeIdx,
1540 LLT NarrowTy) {
1541 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1542 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 return UnableToLegalize;
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 LLT DstTy = MRI.getType(DstReg);
1550
1551 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1552 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1553 // FIXME: Although this would also be legal for the general case, it causes
1554 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1555 // combines not being hit). This seems to be a problem related to the
1556 // artifact combiner.
1557 if (SizeOp0 % NarrowSize != 0) {
1558 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1559 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1560 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565
1566 int NumParts = SizeOp0 / NarrowSize;
1567
1569 for (int i = 0; i < NumParts; ++i)
1570 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1571
1572 if (DstTy.isVector())
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1574 else
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1577 return Legalized;
1578 }
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1581 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1583 unsigned NarrowSize = NarrowTy.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1585
1586 SmallVector<Register, 4> PartRegs;
1587 for (int I = 0; I != NumParts; ++I) {
1588 unsigned Offset = I * NarrowSize;
1589 auto K = MIRBuilder.buildConstant(NarrowTy,
1590 Val.lshr(Offset).trunc(NarrowSize));
1591 PartRegs.push_back(K.getReg(0));
1592 }
1593
1594 LLT LeftoverTy;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1596 SmallVector<Register, 1> LeftoverRegs;
1597 if (LeftoverBits != 0) {
1598 LeftoverTy = LLT::scalar(LeftoverBits);
1599 auto K = MIRBuilder.buildConstant(
1600 LeftoverTy,
1601 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1602 LeftoverRegs.push_back(K.getReg(0));
1603 }
1604
1605 insertParts(MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1607
1608 MI.eraseFromParent();
1609 return Legalized;
1610 }
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1614 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1615 case TargetOpcode::G_TRUNC: {
1616 if (TypeIdx != 1)
1617 return UnableToLegalize;
1618
1619 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1620 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1621 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1622 return UnableToLegalize;
1623 }
1624
1625 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1626 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1628 return Legalized;
1629 }
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1632 if (TypeIdx != 0)
1633 return UnableToLegalize;
1634
1635 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1636 // Should widen scalar first
1637 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1638 return UnableToLegalize;
1639
1640 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1642 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1643 Parts.push_back(
1644 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1645 .getReg(0));
1646 }
1647
1648 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1650 return Legalized;
1651 }
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1662 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1665 return narrowScalarMul(MI, NarrowTy);
1666 case TargetOpcode::G_EXTRACT:
1667 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1668 case TargetOpcode::G_INSERT:
1669 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_LOAD: {
1671 auto &LoadMI = cast<GLoad>(MI);
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1674 if (DstTy.isVector())
1675 return UnableToLegalize;
1676
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1680 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1681 LoadMI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1686 }
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1689 auto &LoadMI = cast<GExtLoad>(MI);
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1692
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1695 unsigned MemSize = MMO.getSizeInBits().getValue();
1696
1697 if (MemSize == NarrowSize) {
1698 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1699 } else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 } else if (MemSize > NarrowSize) {
1702 // FIXME: Need to split the load.
1703 return UnableToLegalize;
1704 }
1705
1706 if (isa<GZExtLoad>(LoadMI))
1707 MIRBuilder.buildZExt(DstReg, TmpReg);
1708 else
1709 MIRBuilder.buildSExt(DstReg, TmpReg);
1710
1711 LoadMI.eraseFromParent();
1712 return Legalized;
1713 }
1714 case TargetOpcode::G_STORE: {
1715 auto &StoreMI = cast<GStore>(MI);
1716
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1720 return UnableToLegalize;
1721
1722 int NumParts = SizeOp0 / NarrowSize;
1723 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1726 return UnableToLegalize;
1727
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1730 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1733 return Legalized;
1734 }
1735
1736 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1737 }
1738 case TargetOpcode::G_SELECT:
1739 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1743 // Legalize bitwise operation:
1744 // A = BinOp<Ty> B, C
1745 // into:
1746 // B1, ..., BN = G_UNMERGE_VALUES B
1747 // C1, ..., CN = G_UNMERGE_VALUES C
1748 // A1 = BinOp<Ty/N> B1, C2
1749 // ...
1750 // AN = BinOp<Ty/N> BN, CN
1751 // A = G_MERGE_VALUES A1, ..., AN
1752 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1753 }
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1757 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1764 if (TypeIdx == 1)
1765 switch (MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1768 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1771 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1772 case TargetOpcode::G_CTPOP:
1773 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTLS:
1775 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1776 default:
1777 return UnableToLegalize;
1778 }
1779
1780 Observer.changingInstr(MI);
1781 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1782 Observer.changedInstr(MI);
1783 return Legalized;
1784 case TargetOpcode::G_INTTOPTR:
1785 if (TypeIdx != 1)
1786 return UnableToLegalize;
1787
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, 1);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 case TargetOpcode::G_PTRTOINT:
1793 if (TypeIdx != 0)
1794 return UnableToLegalize;
1795
1796 Observer.changingInstr(MI);
1797 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1798 Observer.changedInstr(MI);
1799 return Legalized;
1800 case TargetOpcode::G_PHI: {
1801 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1802 // NarrowSize.
1803 if (SizeOp0 % NarrowSize != 0)
1804 return UnableToLegalize;
1805
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1807 SmallVector<Register, 2> DstRegs(NumParts);
1808 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1809 Observer.changingInstr(MI);
1810 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1811 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1812 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1813 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1814 SrcRegs[i / 2], MIRBuilder, MRI);
1815 }
1816 MachineBasicBlock &MBB = *MI.getParent();
1817 MIRBuilder.setInsertPt(MBB, MI);
1818 for (unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1823 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1824 }
1825 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1826 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1827 Observer.changedInstr(MI);
1828 MI.eraseFromParent();
1829 return Legalized;
1830 }
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1833 if (TypeIdx != 2)
1834 return UnableToLegalize;
1835
1836 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1837 Observer.changingInstr(MI);
1838 narrowScalarSrc(MI, NarrowTy, OpIdx);
1839 Observer.changedInstr(MI);
1840 return Legalized;
1841 }
1842 case TargetOpcode::G_ICMP: {
1843 Register LHS = MI.getOperand(2).getReg();
1844 LLT SrcTy = MRI.getType(LHS);
1845 CmpInst::Predicate Pred =
1846 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1847
1848 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1849 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1850 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1851 LHSLeftoverRegs, MIRBuilder, MRI))
1852 return UnableToLegalize;
1853
1854 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1855 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1856 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1858 return UnableToLegalize;
1859
1860 // We now have the LHS and RHS of the compare split into narrow-type
1861 // registers, plus potentially some leftover type.
1862 Register Dst = MI.getOperand(0).getReg();
1863 LLT ResTy = MRI.getType(Dst);
1864 if (ICmpInst::isEquality(Pred)) {
1865 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1866 // them together. For each equal part, the result should be all 0s. For
1867 // each non-equal part, we'll get at least one 1.
1868 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1874 Xors.push_back(Xor);
1875 }
1876
1877 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1878 // to the desired narrow type so that we can OR them together later.
1879 SmallVector<Register, 4> WidenedXors;
1880 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1887 llvm::append_range(Xors, WidenedXors);
1888 }
1889
1890 // Now, for each part we broke up, we know if they are equal/not equal
1891 // based off the G_XOR. We can OR these all together and compare against
1892 // 0 to get the result.
1893 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1894 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1896 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1897 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1898 } else {
1899 Register CmpIn;
1900 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1901 Register CmpOut;
1902 CmpInst::Predicate PartPred;
1903
1904 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1905 PartPred = Pred;
1906 CmpOut = Dst;
1907 } else {
1908 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1910 }
1911
1912 if (!CmpIn) {
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1914 RHSPartRegs[I]);
1915 } else {
1916 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1919 LHSPartRegs[I], RHSPartRegs[I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1921 }
1922
1923 CmpIn = CmpOut;
1924 }
1925
1926 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1940 RHSLeftoverRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 auto CmpEq =
1945 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1946 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1948 }
1949
1950 CmpIn = CmpOut;
1951 }
1952 }
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
1956 case TargetOpcode::G_FCMP:
1957 if (TypeIdx != 0)
1958 return UnableToLegalize;
1959
1960 Observer.changingInstr(MI);
1961 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1962 Observer.changedInstr(MI);
1963 return Legalized;
1964
1965 case TargetOpcode::G_SEXT_INREG: {
1966 if (TypeIdx != 0)
1967 return UnableToLegalize;
1968
1969 int64_t SizeInBits = MI.getOperand(2).getImm();
1970
1971 // So long as the new type has more bits than the bits we're extending we
1972 // don't need to break it apart.
1973 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1974 Observer.changingInstr(MI);
1975 // We don't lose any non-extension bits by truncating the src and
1976 // sign-extending the dst.
1977 MachineOperand &MO1 = MI.getOperand(1);
1978 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.setReg(TruncMIB.getReg(0));
1980
1981 MachineOperand &MO2 = MI.getOperand(0);
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1983 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1984 MIRBuilder.buildSExt(MO2, DstExt);
1985 MO2.setReg(DstExt);
1986 Observer.changedInstr(MI);
1987 return Legalized;
1988 }
1989
1990 // Break it apart. Components below the extension point are unmodified. The
1991 // component containing the extension point becomes a narrower SEXT_INREG.
1992 // Components above it are ashr'd from the component containing the
1993 // extension point.
1994 if (SizeOp0 % NarrowSize != 0)
1995 return UnableToLegalize;
1996 int NumParts = SizeOp0 / NarrowSize;
1997
1998 // List the registers where the destination will be scattered.
2000 // List the registers where the source will be split.
2002
2003 // Create all the temporary registers.
2004 for (int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2006
2007 SrcRegs.push_back(SrcReg);
2008 }
2009
2010 // Explode the big arguments into smaller chunks.
2011 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2012
2013 Register AshrCstReg =
2014 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2015 .getReg(0);
2016 Register FullExtensionReg;
2017 Register PartialExtensionReg;
2018
2019 // Do the operation on each small part.
2020 for (int i = 0; i < NumParts; ++i) {
2021 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2022 DstRegs.push_back(SrcRegs[i]);
2023 PartialExtensionReg = DstRegs.back();
2024 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2028 DstRegs.push_back(FullExtensionReg);
2029 continue;
2030 }
2031 DstRegs.push_back(
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2033 .getReg(0));
2034 FullExtensionReg = DstRegs.back();
2035 } else {
2036 DstRegs.push_back(
2038 .buildInstr(
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2040 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2041 .getReg(0));
2042 PartialExtensionReg = DstRegs.back();
2043 }
2044 }
2045
2046 // Gather the destination registers into the final destination.
2047 Register DstReg = MI.getOperand(0).getReg();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2050 return Legalized;
2051 }
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2055 return UnableToLegalize;
2056
2057 Observer.changingInstr(MI);
2058 SmallVector<Register, 2> SrcRegs, DstRegs;
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2061 MIRBuilder, MRI);
2062
2063 for (unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2066 DstRegs.push_back(DstPart.getReg(0));
2067 }
2068
2069 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2070
2071 Observer.changedInstr(MI);
2072 MI.eraseFromParent();
2073 return Legalized;
2074 }
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079 Observer.changingInstr(MI);
2080 narrowScalarSrc(MI, NarrowTy, 2);
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 }
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2088 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2089 case TargetOpcode::G_FPEXT:
2090 if (TypeIdx != 0)
2091 return UnableToLegalize;
2092 Observer.changingInstr(MI);
2093 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2094 Observer.changedInstr(MI);
2095 return Legalized;
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2098 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2099 case TargetOpcode::G_VSCALE: {
2100 Register Dst = MI.getOperand(0).getReg();
2101 LLT Ty = MRI.getType(Dst);
2102
2103 // Assume VSCALE(1) fits into a legal integer
2104 const APInt One(NarrowTy.getSizeInBits(), 1);
2105 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2108 MIRBuilder.buildMul(Dst, ZExt, C);
2109
2110 MI.eraseFromParent();
2111 return Legalized;
2112 }
2113 }
2114}
2115
2117 LLT Ty = MRI.getType(Val);
2118 if (Ty.isScalar())
2119 return Val;
2120
2121 const DataLayout &DL = MIRBuilder.getDataLayout();
2122 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2123 if (Ty.isPointer()) {
2124 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2125 return Register();
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2127 }
2128
2129 Register NewVal = Val;
2130
2131 assert(Ty.isVector());
2132 if (Ty.isPointerVector())
2133 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2135}
2136
2138 unsigned OpIdx, unsigned ExtOpcode) {
2139 MachineOperand &MO = MI.getOperand(OpIdx);
2140 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.setReg(ExtB.getReg(0));
2142}
2143
2145 unsigned OpIdx) {
2146 MachineOperand &MO = MI.getOperand(OpIdx);
2147 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.setReg(ExtB.getReg(0));
2149}
2150
2152 unsigned OpIdx, unsigned TruncOpcode) {
2153 MachineOperand &MO = MI.getOperand(OpIdx);
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2155 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2157 MO.setReg(DstExt);
2158}
2159
2161 unsigned OpIdx, unsigned ExtOpcode) {
2162 MachineOperand &MO = MI.getOperand(OpIdx);
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2164 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2166 MO.setReg(DstTrunc);
2167}
2168
2170 unsigned OpIdx) {
2171 MachineOperand &MO = MI.getOperand(OpIdx);
2172 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2173 Register Dst = MO.getReg();
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2175 MO.setReg(DstExt);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2177}
2178
2180 unsigned OpIdx) {
2181 MachineOperand &MO = MI.getOperand(OpIdx);
2182 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2183}
2184
2186 MachineOperand &Op = MI.getOperand(OpIdx);
2187 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2188}
2189
2191 MachineOperand &MO = MI.getOperand(OpIdx);
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2193 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2194 MIRBuilder.buildBitcast(MO, CastDst);
2195 MO.setReg(CastDst);
2196}
2197
2199LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2200 LLT WideTy) {
2201 if (TypeIdx != 1)
2202 return UnableToLegalize;
2203
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2206 return UnableToLegalize;
2207
2208 LLT SrcTy = MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2211 const int WideSize = WideTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2213
2214 unsigned NumOps = MI.getNumOperands();
2215 unsigned NumSrc = MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2217
2218 if (WideSize >= DstSize) {
2219 // Directly pack the bits in the target type.
2220 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2221
2222 for (unsigned I = 2; I != NumOps; ++I) {
2223 const unsigned Offset = (I - 1) * PartSize;
2224
2225 Register SrcReg = MI.getOperand(I).getReg();
2226 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2227
2228 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2229
2230 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2231 MRI.createGenericVirtualRegister(WideTy);
2232
2233 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2234 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2235 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2236 ResultReg = NextResult;
2237 }
2238
2239 if (WideSize > DstSize)
2240 MIRBuilder.buildTrunc(DstReg, ResultReg);
2241 else if (DstTy.isPointer())
2242 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2243
2244 MI.eraseFromParent();
2245 return Legalized;
2246 }
2247
2248 // Unmerge the original values to the GCD type, and recombine to the next
2249 // multiple greater than the original type.
2250 //
2251 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2252 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2253 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2254 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2255 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2256 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2257 // %12:_(s12) = G_MERGE_VALUES %10, %11
2258 //
2259 // Padding with undef if necessary:
2260 //
2261 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2262 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2263 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2264 // %7:_(s2) = G_IMPLICIT_DEF
2265 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2266 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2267 // %10:_(s12) = G_MERGE_VALUES %8, %9
2268
2269 const int GCD = std::gcd(SrcSize, WideSize);
2270 LLT GCDTy = LLT::scalar(GCD);
2271
2272 SmallVector<Register, 8> NewMergeRegs;
2273 SmallVector<Register, 8> Unmerges;
2274 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2275
2276 // Decompose the original operands if they don't evenly divide.
2277 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2278 Register SrcReg = MO.getReg();
2279 if (GCD == SrcSize) {
2280 Unmerges.push_back(SrcReg);
2281 } else {
2282 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2284 Unmerges.push_back(Unmerge.getReg(J));
2285 }
2286 }
2287
2288 // Pad with undef to the next size that is a multiple of the requested size.
2289 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2290 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2291 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2292 Unmerges.push_back(UndefReg);
2293 }
2294
2295 const int PartsPerGCD = WideSize / GCD;
2296
2297 // Build merges of each piece.
2298 ArrayRef<Register> Slicer(Unmerges);
2299 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2300 auto Merge =
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2302 NewMergeRegs.push_back(Merge.getReg(0));
2303 }
2304
2305 // A truncate may be necessary if the requested type doesn't evenly divide the
2306 // original result type.
2307 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2309 } else {
2310 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2312 }
2313
2314 MI.eraseFromParent();
2315 return Legalized;
2316}
2317
2319LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2320 LLT WideTy) {
2321 if (TypeIdx != 0)
2322 return UnableToLegalize;
2323
2324 int NumDst = MI.getNumOperands() - 1;
2325 Register SrcReg = MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2327 if (SrcTy.isVector())
2328 return UnableToLegalize;
2329
2330 Register Dst0Reg = MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2332 if (!DstTy.isScalar())
2333 return UnableToLegalize;
2334
2335 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2336 if (SrcTy.isPointer()) {
2337 const DataLayout &DL = MIRBuilder.getDataLayout();
2338 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2339 LLVM_DEBUG(
2340 dbgs() << "Not casting non-integral address space integer\n");
2341 return UnableToLegalize;
2342 }
2343
2344 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2345 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2346 }
2347
2348 // Widen SrcTy to WideTy. This does not affect the result, but since the
2349 // user requested this size, it is probably better handled than SrcTy and
2350 // should reduce the total number of legalization artifacts.
2351 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2352 SrcTy = WideTy;
2353 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2354 }
2355
2356 // Theres no unmerge type to target. Directly extract the bits from the
2357 // source type
2358 unsigned DstSize = DstTy.getSizeInBits();
2359
2360 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2361 for (int I = 1; I != NumDst; ++I) {
2362 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2363 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2364 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2365 }
2366
2367 MI.eraseFromParent();
2368 return Legalized;
2369 }
2370
2371 // Extend the source to a wider type.
2372 LLT LCMTy = getLCMType(SrcTy, WideTy);
2373
2374 Register WideSrc = SrcReg;
2375 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2376 // TODO: If this is an integral address space, cast to integer and anyext.
2377 if (SrcTy.isPointer()) {
2378 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2379 return UnableToLegalize;
2380 }
2381
2382 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2383 }
2384
2385 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2386
2387 // Create a sequence of unmerges and merges to the original results. Since we
2388 // may have widened the source, we will need to pad the results with dead defs
2389 // to cover the source register.
2390 // e.g. widen s48 to s64:
2391 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2392 //
2393 // =>
2394 // %4:_(s192) = G_ANYEXT %0:_(s96)
2395 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2396 // ; unpack to GCD type, with extra dead defs
2397 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2398 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2399 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2400 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2401 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2402 const LLT GCDTy = getGCDType(WideTy, DstTy);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2404 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2405
2406 // Directly unmerge to the destination without going through a GCD type
2407 // if possible
2408 if (PartsPerRemerge == 1) {
2409 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2410
2411 for (int I = 0; I != NumUnmerge; ++I) {
2412 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2413
2414 for (int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx = I * PartsPerUnmerge + J;
2416 if (Idx < NumDst)
2417 MIB.addDef(MI.getOperand(Idx).getReg());
2418 else {
2419 // Create dead def for excess components.
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2421 }
2422 }
2423
2424 MIB.addUse(Unmerge.getReg(I));
2425 }
2426 } else {
2427 SmallVector<Register, 16> Parts;
2428 for (int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2430
2431 SmallVector<Register, 8> RemergeParts;
2432 for (int I = 0; I != NumDst; ++I) {
2433 for (int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx = I * PartsPerRemerge + J;
2435 RemergeParts.emplace_back(Parts[Idx]);
2436 }
2437
2438 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2439 RemergeParts.clear();
2440 }
2441 }
2442
2443 MI.eraseFromParent();
2444 return Legalized;
2445}
2446
2448LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2449 LLT WideTy) {
2450 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2451 unsigned Offset = MI.getOperand(2).getImm();
2452
2453 if (TypeIdx == 0) {
2454 if (SrcTy.isVector() || DstTy.isVector())
2455 return UnableToLegalize;
2456
2457 SrcOp Src(SrcReg);
2458 if (SrcTy.isPointer()) {
2459 // Extracts from pointers can be handled only if they are really just
2460 // simple integers.
2461 const DataLayout &DL = MIRBuilder.getDataLayout();
2462 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2463 return UnableToLegalize;
2464
2465 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2466 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2467 SrcTy = SrcAsIntTy;
2468 }
2469
2470 if (DstTy.isPointer())
2471 return UnableToLegalize;
2472
2473 if (Offset == 0) {
2474 // Avoid a shift in the degenerate case.
2475 MIRBuilder.buildTrunc(DstReg,
2476 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2477 MI.eraseFromParent();
2478 return Legalized;
2479 }
2480
2481 // Do a shift in the source type.
2482 LLT ShiftTy = SrcTy;
2483 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2484 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2485 ShiftTy = WideTy;
2486 }
2487
2488 auto LShr = MIRBuilder.buildLShr(
2489 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2490 MIRBuilder.buildTrunc(DstReg, LShr);
2491 MI.eraseFromParent();
2492 return Legalized;
2493 }
2494
2495 if (SrcTy.isScalar()) {
2496 Observer.changingInstr(MI);
2497 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2498 Observer.changedInstr(MI);
2499 return Legalized;
2500 }
2501
2502 if (!SrcTy.isVector())
2503 return UnableToLegalize;
2504
2505 if (DstTy != SrcTy.getElementType())
2506 return UnableToLegalize;
2507
2508 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2509 return UnableToLegalize;
2510
2511 Observer.changingInstr(MI);
2512 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2513
2514 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2515 Offset);
2516 widenScalarDst(MI, WideTy.getScalarType(), 0);
2517 Observer.changedInstr(MI);
2518 return Legalized;
2519}
2520
2522LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2523 LLT WideTy) {
2524 if (TypeIdx != 0 || WideTy.isVector())
2525 return UnableToLegalize;
2526 Observer.changingInstr(MI);
2527 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2528 widenScalarDst(MI, WideTy);
2529 Observer.changedInstr(MI);
2530 return Legalized;
2531}
2532
2534LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2535 LLT WideTy) {
2536 unsigned Opcode;
2537 unsigned ExtOpcode;
2538 std::optional<Register> CarryIn;
2539 switch (MI.getOpcode()) {
2540 default:
2541 llvm_unreachable("Unexpected opcode!");
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2545 break;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2549 break;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2553 break;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2557 break;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn = MI.getOperand(4).getReg();
2562 break;
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn = MI.getOperand(4).getReg();
2567 break;
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn = MI.getOperand(4).getReg();
2572 break;
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn = MI.getOperand(4).getReg();
2577 break;
2578 }
2579
2580 if (TypeIdx == 1) {
2581 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2582
2583 Observer.changingInstr(MI);
2584 if (CarryIn)
2585 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2586 widenScalarDst(MI, WideTy, 1);
2587
2588 Observer.changedInstr(MI);
2589 return Legalized;
2590 }
2591
2592 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2593 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2594 // Do the arithmetic in the larger type.
2595 Register NewOp;
2596 if (CarryIn) {
2597 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2598 NewOp = MIRBuilder
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2601 .getReg(0);
2602 } else {
2603 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2604 }
2605 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2606 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2608 // There is no overflow if the ExtOp is the same as NewOp.
2609 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2610 // Now trunc the NewOp to the original result.
2611 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2612 MI.eraseFromParent();
2613 return Legalized;
2614}
2615
2617LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2618 LLT WideTy) {
2619 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2624 // We can convert this to:
2625 // 1. Any extend iN to iM
2626 // 2. SHL by M-N
2627 // 3. [US][ADD|SUB|SHL]SAT
2628 // 4. L/ASHR by M-N
2629 //
2630 // It may be more efficient to lower this to a min and a max operation in
2631 // the higher precision arithmetic if the promoted operation isn't legal,
2632 // but this decision is up to the target's lowering request.
2633 Register DstReg = MI.getOperand(0).getReg();
2634
2635 unsigned NewBits = WideTy.getScalarSizeInBits();
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2637
2638 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2639 // must not left shift the RHS to preserve the shift amount.
2640 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2641 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2642 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2643 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2644 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2645 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2646
2647 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR}, MI.getFlags());
2649
2650 // Use a shift that will preserve the number of sign bits when the trunc is
2651 // folded away.
2652 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2653 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2654
2655 MIRBuilder.buildTrunc(DstReg, Result);
2656 MI.eraseFromParent();
2657 return Legalized;
2658}
2659
2661LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2662 LLT WideTy) {
2663 if (TypeIdx == 1) {
2664 Observer.changingInstr(MI);
2665 widenScalarDst(MI, WideTy, 1);
2666 Observer.changedInstr(MI);
2667 return Legalized;
2668 }
2669
2670 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2671 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2672 LLT SrcTy = MRI.getType(LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2674 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2675
2676 // To determine if the result overflowed in the larger type, we extend the
2677 // input to the larger type, do the multiply (checking if it overflows),
2678 // then also check the high bits of the result to see if overflow happened
2679 // there.
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2682 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2683
2684 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2685 // so we don't need to check the overflow result of larger type Mulo.
2686 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2687
2688 unsigned MulOpc =
2689 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2690
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2695 else
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2697
2698 auto Mul = Mulo->getOperand(0);
2699 MIRBuilder.buildTrunc(Result, Mul);
2700
2701 MachineInstrBuilder ExtResult;
2702 // Overflow occurred if it occurred in the larger type, or if the high part
2703 // of the result does not zero/sign-extend the low part. Check this second
2704 // possibility first.
2705 if (IsSigned) {
2706 // For signed, overflow occurred when the high part does not sign-extend
2707 // the low part.
2708 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2709 } else {
2710 // Unsigned overflow occurred when the high part does not zero-extend the
2711 // low part.
2712 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2713 }
2714
2715 if (WideMulCanOverflow) {
2716 auto Overflow =
2717 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2718 // Finally check if the multiplication in the larger type itself overflowed.
2719 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2720 } else {
2721 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2722 }
2723 MI.eraseFromParent();
2724 return Legalized;
2725}
2726
2729 unsigned Opcode = MI.getOpcode();
2730 switch (Opcode) {
2731 default:
2732 return UnableToLegalize;
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2744 Observer.changingInstr(MI);
2745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2746 widenScalarDst(MI, WideTy, 0);
2747 Observer.changedInstr(MI);
2748 return Legalized;
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2751 Observer.changingInstr(MI);
2752 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2753 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2754 widenScalarDst(MI, WideTy, 0);
2755 Observer.changedInstr(MI);
2756 return Legalized;
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2758 if (TypeIdx == 0) {
2759 Observer.changingInstr(MI);
2760 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2761 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2762 widenScalarDst(MI, WideTy, 0);
2763 Observer.changedInstr(MI);
2764 return Legalized;
2765 }
2766 assert(TypeIdx == 1 &&
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2768 Observer.changingInstr(MI);
2769 widenScalarDst(MI, WideTy, 1);
2770 Observer.changedInstr(MI);
2771 return Legalized;
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2805 if (TypeIdx == 0) {
2806 Observer.changingInstr(MI);
2807 widenScalarDst(MI, WideTy, 0);
2808 Observer.changedInstr(MI);
2809 return Legalized;
2810 }
2811
2812 Register SrcReg = MI.getOperand(1).getReg();
2813
2814 // First extend the input.
2815 unsigned ExtOpc;
2816 switch (Opcode) {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF: // undef bits shifted out below
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2821 break;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2824 break;
2825 default:
2826 ExtOpc = TargetOpcode::G_ZEXT;
2827 }
2828
2829 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2833 // The count is the same in the larger type except if the original
2834 // value was zero. This can be handled by setting the bit just off
2835 // the top of the original type.
2836 auto TopBit =
2838 MIBSrc = MIRBuilder.buildOr(
2839 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2840 // Now we know the operand is non-zero, use the more relaxed opcode.
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2842 }
2843
2844 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2845
2846 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2847 // An optimization where the result is the CTLZ after the left shift by
2848 // (Difference in widety and current ty), that is,
2849 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2850 // Result = ctlz MIBSrc
2851 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2852 MIRBuilder.buildConstant(WideTy, SizeDiff));
2853 }
2854
2855 // Perform the operation at the larger size.
2856 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2857 // This is already the correct result for CTPOP and CTTZs
2858 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2859 // The correct result is NewOp - (Difference in widety and current ty).
2860 MIBNewOp = MIRBuilder.buildSub(
2861 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2862 }
2863
2864 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2865 MI.eraseFromParent();
2866 return Legalized;
2867 }
2868 case TargetOpcode::G_BSWAP: {
2869 Observer.changingInstr(MI);
2870 Register DstReg = MI.getOperand(0).getReg();
2871
2872 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2873 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2874 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2875 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2876
2877 MI.getOperand(0).setReg(DstExt);
2878
2879 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2880
2881 LLT Ty = MRI.getType(DstReg);
2882 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2883 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2884 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2885
2886 MIRBuilder.buildTrunc(DstReg, ShrReg);
2887 Observer.changedInstr(MI);
2888 return Legalized;
2889 }
2890 case TargetOpcode::G_BITREVERSE: {
2891 Observer.changingInstr(MI);
2892
2893 Register DstReg = MI.getOperand(0).getReg();
2894 LLT Ty = MRI.getType(DstReg);
2895 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2896
2897 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2898 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2899 MI.getOperand(0).setReg(DstExt);
2900 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2901
2902 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2903 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2904 MIRBuilder.buildTrunc(DstReg, Shift);
2905 Observer.changedInstr(MI);
2906 return Legalized;
2907 }
2908 case TargetOpcode::G_FREEZE:
2909 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2910 Observer.changingInstr(MI);
2911 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2912 widenScalarDst(MI, WideTy);
2913 Observer.changedInstr(MI);
2914 return Legalized;
2915
2916 case TargetOpcode::G_ABS:
2917 Observer.changingInstr(MI);
2918 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2919 widenScalarDst(MI, WideTy);
2920 Observer.changedInstr(MI);
2921 return Legalized;
2922
2923 case TargetOpcode::G_ADD:
2924 case TargetOpcode::G_AND:
2925 case TargetOpcode::G_MUL:
2926 case TargetOpcode::G_OR:
2927 case TargetOpcode::G_XOR:
2928 case TargetOpcode::G_SUB:
2929 case TargetOpcode::G_SHUFFLE_VECTOR:
2930 // Perform operation at larger width (any extension is fines here, high bits
2931 // don't affect the result) and then truncate the result back to the
2932 // original type.
2933 Observer.changingInstr(MI);
2934 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2935 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2936 widenScalarDst(MI, WideTy);
2937 Observer.changedInstr(MI);
2938 return Legalized;
2939
2940 case TargetOpcode::G_SBFX:
2941 case TargetOpcode::G_UBFX:
2942 Observer.changingInstr(MI);
2943
2944 if (TypeIdx == 0) {
2945 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2946 widenScalarDst(MI, WideTy);
2947 } else {
2948 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2949 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2950 }
2951
2952 Observer.changedInstr(MI);
2953 return Legalized;
2954
2955 case TargetOpcode::G_SHL:
2956 Observer.changingInstr(MI);
2957
2958 if (TypeIdx == 0) {
2959 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2960 widenScalarDst(MI, WideTy);
2961 } else {
2962 assert(TypeIdx == 1);
2963 // The "number of bits to shift" operand must preserve its value as an
2964 // unsigned integer:
2965 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2966 }
2967
2968 Observer.changedInstr(MI);
2969 return Legalized;
2970
2971 case TargetOpcode::G_ROTR:
2972 case TargetOpcode::G_ROTL:
2973 if (TypeIdx != 1)
2974 return UnableToLegalize;
2975
2976 Observer.changingInstr(MI);
2977 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2978 Observer.changedInstr(MI);
2979 return Legalized;
2980
2981 case TargetOpcode::G_SDIV:
2982 case TargetOpcode::G_SREM:
2983 case TargetOpcode::G_SMIN:
2984 case TargetOpcode::G_SMAX:
2985 case TargetOpcode::G_ABDS:
2986 Observer.changingInstr(MI);
2987 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2988 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2989 widenScalarDst(MI, WideTy);
2990 Observer.changedInstr(MI);
2991 return Legalized;
2992
2993 case TargetOpcode::G_SDIVREM:
2994 Observer.changingInstr(MI);
2995 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2996 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2997 widenScalarDst(MI, WideTy);
2998 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2999 widenScalarDst(MI, WideTy, 1);
3000 Observer.changedInstr(MI);
3001 return Legalized;
3002
3003 case TargetOpcode::G_ASHR:
3004 case TargetOpcode::G_LSHR:
3005 Observer.changingInstr(MI);
3006
3007 if (TypeIdx == 0) {
3008 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3009 : TargetOpcode::G_ZEXT;
3010
3011 widenScalarSrc(MI, WideTy, 1, CvtOp);
3012 widenScalarDst(MI, WideTy);
3013 } else {
3014 assert(TypeIdx == 1);
3015 // The "number of bits to shift" operand must preserve its value as an
3016 // unsigned integer:
3017 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3018 }
3019
3020 Observer.changedInstr(MI);
3021 return Legalized;
3022 case TargetOpcode::G_UDIV:
3023 case TargetOpcode::G_UREM:
3024 case TargetOpcode::G_ABDU:
3025 Observer.changingInstr(MI);
3026 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3027 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3028 widenScalarDst(MI, WideTy);
3029 Observer.changedInstr(MI);
3030 return Legalized;
3031 case TargetOpcode::G_UDIVREM:
3032 Observer.changingInstr(MI);
3033 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3034 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3035 widenScalarDst(MI, WideTy);
3036 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3037 widenScalarDst(MI, WideTy, 1);
3038 Observer.changedInstr(MI);
3039 return Legalized;
3040 case TargetOpcode::G_UMIN:
3041 case TargetOpcode::G_UMAX: {
3042 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3043
3044 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3045 unsigned ExtOpc =
3046 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3047 getApproximateEVTForLLT(WideTy, Ctx))
3048 ? TargetOpcode::G_SEXT
3049 : TargetOpcode::G_ZEXT;
3050
3051 Observer.changingInstr(MI);
3052 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3053 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3054 widenScalarDst(MI, WideTy);
3055 Observer.changedInstr(MI);
3056 return Legalized;
3057 }
3058
3059 case TargetOpcode::G_SELECT:
3060 Observer.changingInstr(MI);
3061 if (TypeIdx == 0) {
3062 // Perform operation at larger width (any extension is fine here, high
3063 // bits don't affect the result) and then truncate the result back to the
3064 // original type.
3065 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3066 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3067 widenScalarDst(MI, WideTy);
3068 } else {
3069 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3070 // Explicit extension is required here since high bits affect the result.
3071 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3072 }
3073 Observer.changedInstr(MI);
3074 return Legalized;
3075
3076 case TargetOpcode::G_FPEXT:
3077 if (TypeIdx != 1)
3078 return UnableToLegalize;
3079
3080 Observer.changingInstr(MI);
3081 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3082 Observer.changedInstr(MI);
3083 return Legalized;
3084 case TargetOpcode::G_FPTOSI:
3085 case TargetOpcode::G_FPTOUI:
3086 case TargetOpcode::G_INTRINSIC_LRINT:
3087 case TargetOpcode::G_INTRINSIC_LLRINT:
3088 case TargetOpcode::G_IS_FPCLASS:
3089 Observer.changingInstr(MI);
3090
3091 if (TypeIdx == 0)
3092 widenScalarDst(MI, WideTy);
3093 else
3094 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3095
3096 Observer.changedInstr(MI);
3097 return Legalized;
3098 case TargetOpcode::G_SITOFP:
3099 Observer.changingInstr(MI);
3100
3101 if (TypeIdx == 0)
3102 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3103 else
3104 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3105
3106 Observer.changedInstr(MI);
3107 return Legalized;
3108 case TargetOpcode::G_UITOFP:
3109 Observer.changingInstr(MI);
3110
3111 if (TypeIdx == 0)
3112 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3113 else
3114 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3115
3116 Observer.changedInstr(MI);
3117 return Legalized;
3118 case TargetOpcode::G_FPTOSI_SAT:
3119 case TargetOpcode::G_FPTOUI_SAT:
3120 Observer.changingInstr(MI);
3121
3122 if (TypeIdx == 0) {
3123 Register OldDst = MI.getOperand(0).getReg();
3124 LLT Ty = MRI.getType(OldDst);
3125 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3126 Register NewDst;
3127 MI.getOperand(0).setReg(ExtReg);
3128 uint64_t ShortBits = Ty.getScalarSizeInBits();
3129 uint64_t WideBits = WideTy.getScalarSizeInBits();
3130 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3131 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3132 // z = i16 fptosi_sat(a)
3133 // ->
3134 // x = i32 fptosi_sat(a)
3135 // y = smin(x, 32767)
3136 // z = smax(y, -32768)
3137 auto MaxVal = MIRBuilder.buildConstant(
3138 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3139 auto MinVal = MIRBuilder.buildConstant(
3140 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3141 Register MidReg =
3142 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3143 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3144 } else {
3145 // z = i16 fptoui_sat(a)
3146 // ->
3147 // x = i32 fptoui_sat(a)
3148 // y = smin(x, 65535)
3149 auto MaxVal = MIRBuilder.buildConstant(
3150 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3151 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3152 }
3153 MIRBuilder.buildTrunc(OldDst, NewDst);
3154 } else
3155 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3156
3157 Observer.changedInstr(MI);
3158 return Legalized;
3159 case TargetOpcode::G_LOAD:
3160 case TargetOpcode::G_SEXTLOAD:
3161 case TargetOpcode::G_ZEXTLOAD:
3162 Observer.changingInstr(MI);
3163 widenScalarDst(MI, WideTy);
3164 Observer.changedInstr(MI);
3165 return Legalized;
3166
3167 case TargetOpcode::G_STORE: {
3168 if (TypeIdx != 0)
3169 return UnableToLegalize;
3170
3171 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3172 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3173 if (!Ty.isScalar()) {
3174 // We need to widen the vector element type.
3175 Observer.changingInstr(MI);
3176 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3177 // We also need to adjust the MMO to turn this into a truncating store.
3178 MachineMemOperand &MMO = **MI.memoperands_begin();
3179 MachineFunction &MF = MIRBuilder.getMF();
3180 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3181 MI.setMemRefs(MF, {NewMMO});
3182 Observer.changedInstr(MI);
3183 return Legalized;
3184 }
3185
3186 Observer.changingInstr(MI);
3187
3188 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3189 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3190 widenScalarSrc(MI, WideTy, 0, ExtType);
3191
3192 Observer.changedInstr(MI);
3193 return Legalized;
3194 }
3195 case TargetOpcode::G_CONSTANT: {
3196 MachineOperand &SrcMO = MI.getOperand(1);
3197 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3198 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3199 MRI.getType(MI.getOperand(0).getReg()));
3200 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3201 ExtOpc == TargetOpcode::G_ANYEXT) &&
3202 "Illegal Extend");
3203 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3204 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3205 ? SrcVal.sext(WideTy.getSizeInBits())
3206 : SrcVal.zext(WideTy.getSizeInBits());
3207 Observer.changingInstr(MI);
3208 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3209
3210 widenScalarDst(MI, WideTy);
3211 Observer.changedInstr(MI);
3212 return Legalized;
3213 }
3214 case TargetOpcode::G_FCONSTANT: {
3215 // To avoid changing the bits of the constant due to extension to a larger
3216 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3217 MachineOperand &SrcMO = MI.getOperand(1);
3218 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3219 MIRBuilder.setInstrAndDebugLoc(MI);
3220 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3221 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3222 MI.eraseFromParent();
3223 return Legalized;
3224 }
3225 case TargetOpcode::G_IMPLICIT_DEF: {
3226 Observer.changingInstr(MI);
3227 widenScalarDst(MI, WideTy);
3228 Observer.changedInstr(MI);
3229 return Legalized;
3230 }
3231 case TargetOpcode::G_BRCOND:
3232 Observer.changingInstr(MI);
3233 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3234 Observer.changedInstr(MI);
3235 return Legalized;
3236
3237 case TargetOpcode::G_FCMP:
3238 Observer.changingInstr(MI);
3239 if (TypeIdx == 0)
3240 widenScalarDst(MI, WideTy);
3241 else {
3242 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3243 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3244 }
3245 Observer.changedInstr(MI);
3246 return Legalized;
3247
3248 case TargetOpcode::G_ICMP:
3249 Observer.changingInstr(MI);
3250 if (TypeIdx == 0)
3251 widenScalarDst(MI, WideTy);
3252 else {
3253 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3254 CmpInst::Predicate Pred =
3255 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3256
3257 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3258 unsigned ExtOpcode =
3259 (CmpInst::isSigned(Pred) ||
3260 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3261 getApproximateEVTForLLT(WideTy, Ctx)))
3262 ? TargetOpcode::G_SEXT
3263 : TargetOpcode::G_ZEXT;
3264 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3265 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3266 }
3267 Observer.changedInstr(MI);
3268 return Legalized;
3269
3270 case TargetOpcode::G_PTR_ADD:
3271 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3272 Observer.changingInstr(MI);
3273 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3274 Observer.changedInstr(MI);
3275 return Legalized;
3276
3277 case TargetOpcode::G_PHI: {
3278 assert(TypeIdx == 0 && "Expecting only Idx 0");
3279
3280 Observer.changingInstr(MI);
3281 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3282 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3283 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3284 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3285 }
3286
3287 MachineBasicBlock &MBB = *MI.getParent();
3288 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3289 widenScalarDst(MI, WideTy);
3290 Observer.changedInstr(MI);
3291 return Legalized;
3292 }
3293 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3294 if (TypeIdx == 0) {
3295 Register VecReg = MI.getOperand(1).getReg();
3296 LLT VecTy = MRI.getType(VecReg);
3297 Observer.changingInstr(MI);
3298
3300 MI,
3302 TargetOpcode::G_ANYEXT);
3303
3304 widenScalarDst(MI, WideTy, 0);
3305 Observer.changedInstr(MI);
3306 return Legalized;
3307 }
3308
3309 if (TypeIdx != 2)
3310 return UnableToLegalize;
3311 Observer.changingInstr(MI);
3312 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3313 Observer.changedInstr(MI);
3314 return Legalized;
3315 }
3316 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3317 if (TypeIdx == 0) {
3318 Observer.changingInstr(MI);
3319 const LLT WideEltTy = WideTy.getElementType();
3320
3321 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3322 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3323 widenScalarDst(MI, WideTy, 0);
3324 Observer.changedInstr(MI);
3325 return Legalized;
3326 }
3327
3328 if (TypeIdx == 1) {
3329 Observer.changingInstr(MI);
3330
3331 Register VecReg = MI.getOperand(1).getReg();
3332 LLT VecTy = MRI.getType(VecReg);
3333 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3334
3335 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3336 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3337 widenScalarDst(MI, WideVecTy, 0);
3338 Observer.changedInstr(MI);
3339 return Legalized;
3340 }
3341
3342 if (TypeIdx == 2) {
3343 Observer.changingInstr(MI);
3344 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3345 Observer.changedInstr(MI);
3346 return Legalized;
3347 }
3348
3349 return UnableToLegalize;
3350 }
3351 case TargetOpcode::G_FADD:
3352 case TargetOpcode::G_FMUL:
3353 case TargetOpcode::G_FSUB:
3354 case TargetOpcode::G_FMA:
3355 case TargetOpcode::G_FMAD:
3356 case TargetOpcode::G_FNEG:
3357 case TargetOpcode::G_FABS:
3358 case TargetOpcode::G_FCANONICALIZE:
3359 case TargetOpcode::G_FMINNUM:
3360 case TargetOpcode::G_FMAXNUM:
3361 case TargetOpcode::G_FMINNUM_IEEE:
3362 case TargetOpcode::G_FMAXNUM_IEEE:
3363 case TargetOpcode::G_FMINIMUM:
3364 case TargetOpcode::G_FMAXIMUM:
3365 case TargetOpcode::G_FMINIMUMNUM:
3366 case TargetOpcode::G_FMAXIMUMNUM:
3367 case TargetOpcode::G_FDIV:
3368 case TargetOpcode::G_FREM:
3369 case TargetOpcode::G_FCEIL:
3370 case TargetOpcode::G_FFLOOR:
3371 case TargetOpcode::G_FCOS:
3372 case TargetOpcode::G_FSIN:
3373 case TargetOpcode::G_FTAN:
3374 case TargetOpcode::G_FACOS:
3375 case TargetOpcode::G_FASIN:
3376 case TargetOpcode::G_FATAN:
3377 case TargetOpcode::G_FATAN2:
3378 case TargetOpcode::G_FCOSH:
3379 case TargetOpcode::G_FSINH:
3380 case TargetOpcode::G_FTANH:
3381 case TargetOpcode::G_FLOG10:
3382 case TargetOpcode::G_FLOG:
3383 case TargetOpcode::G_FLOG2:
3384 case TargetOpcode::G_FRINT:
3385 case TargetOpcode::G_FNEARBYINT:
3386 case TargetOpcode::G_FSQRT:
3387 case TargetOpcode::G_FEXP:
3388 case TargetOpcode::G_FEXP2:
3389 case TargetOpcode::G_FEXP10:
3390 case TargetOpcode::G_FPOW:
3391 case TargetOpcode::G_INTRINSIC_TRUNC:
3392 case TargetOpcode::G_INTRINSIC_ROUND:
3393 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3394 assert(TypeIdx == 0);
3395 Observer.changingInstr(MI);
3396
3397 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3398 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3399
3400 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3401 Observer.changedInstr(MI);
3402 return Legalized;
3403 case TargetOpcode::G_FMODF: {
3404 Observer.changingInstr(MI);
3405 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3406
3407 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3408 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3409 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3410 Observer.changedInstr(MI);
3411 return Legalized;
3412 }
3413 case TargetOpcode::G_FPOWI:
3414 case TargetOpcode::G_FLDEXP:
3415 case TargetOpcode::G_STRICT_FLDEXP: {
3416 if (TypeIdx == 0) {
3417 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3418 return UnableToLegalize;
3419
3420 Observer.changingInstr(MI);
3421 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3422 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3423 Observer.changedInstr(MI);
3424 return Legalized;
3425 }
3426
3427 if (TypeIdx == 1) {
3428 // For some reason SelectionDAG tries to promote to a libcall without
3429 // actually changing the integer type for promotion.
3430 Observer.changingInstr(MI);
3431 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3432 Observer.changedInstr(MI);
3433 return Legalized;
3434 }
3435
3436 return UnableToLegalize;
3437 }
3438 case TargetOpcode::G_FFREXP: {
3439 Observer.changingInstr(MI);
3440
3441 if (TypeIdx == 0) {
3442 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3443 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3444 } else {
3445 widenScalarDst(MI, WideTy, 1);
3446 }
3447
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 }
3451 case TargetOpcode::G_LROUND:
3452 case TargetOpcode::G_LLROUND:
3453 Observer.changingInstr(MI);
3454
3455 if (TypeIdx == 0)
3456 widenScalarDst(MI, WideTy);
3457 else
3458 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3459
3460 Observer.changedInstr(MI);
3461 return Legalized;
3462
3463 case TargetOpcode::G_INTTOPTR:
3464 if (TypeIdx != 1)
3465 return UnableToLegalize;
3466
3467 Observer.changingInstr(MI);
3468 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3469 Observer.changedInstr(MI);
3470 return Legalized;
3471 case TargetOpcode::G_PTRTOINT:
3472 if (TypeIdx != 0)
3473 return UnableToLegalize;
3474
3475 Observer.changingInstr(MI);
3476 widenScalarDst(MI, WideTy, 0);
3477 Observer.changedInstr(MI);
3478 return Legalized;
3479 case TargetOpcode::G_BUILD_VECTOR: {
3480 Observer.changingInstr(MI);
3481
3482 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3483 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3484 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3485
3486 // Avoid changing the result vector type if the source element type was
3487 // requested.
3488 if (TypeIdx == 1) {
3489 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3490 } else {
3491 widenScalarDst(MI, WideTy, 0);
3492 }
3493
3494 Observer.changedInstr(MI);
3495 return Legalized;
3496 }
3497 case TargetOpcode::G_SEXT_INREG:
3498 if (TypeIdx != 0)
3499 return UnableToLegalize;
3500
3501 Observer.changingInstr(MI);
3502 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3503 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3504 Observer.changedInstr(MI);
3505 return Legalized;
3506 case TargetOpcode::G_PTRMASK: {
3507 if (TypeIdx != 1)
3508 return UnableToLegalize;
3509 Observer.changingInstr(MI);
3510 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3511 Observer.changedInstr(MI);
3512 return Legalized;
3513 }
3514 case TargetOpcode::G_VECREDUCE_ADD: {
3515 if (TypeIdx != 1)
3516 return UnableToLegalize;
3517 Observer.changingInstr(MI);
3518 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3519 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3520 Observer.changedInstr(MI);
3521 return Legalized;
3522 }
3523 case TargetOpcode::G_VECREDUCE_FADD:
3524 case TargetOpcode::G_VECREDUCE_FMUL:
3525 case TargetOpcode::G_VECREDUCE_FMIN:
3526 case TargetOpcode::G_VECREDUCE_FMAX:
3527 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3528 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3529 if (TypeIdx != 0)
3530 return UnableToLegalize;
3531 Observer.changingInstr(MI);
3532 Register VecReg = MI.getOperand(1).getReg();
3533 LLT VecTy = MRI.getType(VecReg);
3534 LLT WideVecTy = VecTy.changeElementType(WideTy);
3535 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3536 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3537 Observer.changedInstr(MI);
3538 return Legalized;
3539 }
3540 case TargetOpcode::G_VSCALE: {
3541 MachineOperand &SrcMO = MI.getOperand(1);
3542 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3543 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3544 // The CImm is always a signed value
3545 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3546 Observer.changingInstr(MI);
3547 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3548 widenScalarDst(MI, WideTy);
3549 Observer.changedInstr(MI);
3550 return Legalized;
3551 }
3552 case TargetOpcode::G_SPLAT_VECTOR: {
3553 if (TypeIdx != 1)
3554 return UnableToLegalize;
3555
3556 Observer.changingInstr(MI);
3557 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3558 Observer.changedInstr(MI);
3559 return Legalized;
3560 }
3561 case TargetOpcode::G_INSERT_SUBVECTOR: {
3562 if (TypeIdx != 0)
3563 return UnableToLegalize;
3564
3566 Register BigVec = IS.getBigVec();
3567 Register SubVec = IS.getSubVec();
3568
3569 LLT SubVecTy = MRI.getType(SubVec);
3570 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3571
3572 // Widen the G_INSERT_SUBVECTOR
3573 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3574 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3575 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3576 IS.getIndexImm());
3577
3578 // Truncate back down
3579 auto SplatZero = MIRBuilder.buildSplatVector(
3580 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3581 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3582 SplatZero);
3583
3584 MI.eraseFromParent();
3585
3586 return Legalized;
3587 }
3588 }
3589}
3590
3592 MachineIRBuilder &B, Register Src, LLT Ty) {
3593 auto Unmerge = B.buildUnmerge(Ty, Src);
3594 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3595 Pieces.push_back(Unmerge.getReg(I));
3596}
3597
3598static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3599 MachineIRBuilder &MIRBuilder) {
3600 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3601 MachineFunction &MF = MIRBuilder.getMF();
3602 const DataLayout &DL = MIRBuilder.getDataLayout();
3603 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3604 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3605 LLT DstLLT = MRI.getType(DstReg);
3606
3607 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3608
3609 auto Addr = MIRBuilder.buildConstantPool(
3610 AddrPtrTy,
3611 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3612
3613 MachineMemOperand *MMO =
3615 MachineMemOperand::MOLoad, DstLLT, Alignment);
3616
3617 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3618}
3619
3622 const MachineOperand &ConstOperand = MI.getOperand(1);
3623 const Constant *ConstantVal = ConstOperand.getCImm();
3624
3625 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3626 MI.eraseFromParent();
3627
3628 return Legalized;
3629}
3630
3633 const MachineOperand &ConstOperand = MI.getOperand(1);
3634 const Constant *ConstantVal = ConstOperand.getFPImm();
3635
3636 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3637 MI.eraseFromParent();
3638
3639 return Legalized;
3640}
3641
3644 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3645 if (SrcTy.isVector()) {
3646 LLT SrcEltTy = SrcTy.getElementType();
3648
3649 if (DstTy.isVector()) {
3650 int NumDstElt = DstTy.getNumElements();
3651 int NumSrcElt = SrcTy.getNumElements();
3652
3653 LLT DstEltTy = DstTy.getElementType();
3654 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3655 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3656
3657 // If there's an element size mismatch, insert intermediate casts to match
3658 // the result element type.
3659 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3660 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3661 //
3662 // =>
3663 //
3664 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3665 // %3:_(<2 x s8>) = G_BITCAST %2
3666 // %4:_(<2 x s8>) = G_BITCAST %3
3667 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3668 DstCastTy = DstTy.changeVectorElementCount(
3669 ElementCount::getFixed(NumDstElt / NumSrcElt));
3670 SrcPartTy = SrcEltTy;
3671 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3672 //
3673 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3674 //
3675 // =>
3676 //
3677 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3678 // %3:_(s16) = G_BITCAST %2
3679 // %4:_(s16) = G_BITCAST %3
3680 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3681 SrcPartTy = SrcTy.changeVectorElementCount(
3682 ElementCount::getFixed(NumSrcElt / NumDstElt));
3683 DstCastTy = DstEltTy;
3684 }
3685
3686 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3687 for (Register &SrcReg : SrcRegs)
3688 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3689 } else
3690 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3691
3692 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3693 MI.eraseFromParent();
3694 return Legalized;
3695 }
3696
3697 if (DstTy.isVector()) {
3699 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3700 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3701 MI.eraseFromParent();
3702 return Legalized;
3703 }
3704
3705 return UnableToLegalize;
3706}
3707
3708/// Figure out the bit offset into a register when coercing a vector index for
3709/// the wide element type. This is only for the case when promoting vector to
3710/// one with larger elements.
3711//
3712///
3713/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3714/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3716 Register Idx,
3717 unsigned NewEltSize,
3718 unsigned OldEltSize) {
3719 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3720 LLT IdxTy = B.getMRI()->getType(Idx);
3721
3722 // Now figure out the amount we need to shift to get the target bits.
3723 auto OffsetMask = B.buildConstant(
3724 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3725 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3726 return B.buildShl(IdxTy, OffsetIdx,
3727 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3728}
3729
3730/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3731/// is casting to a vector with a smaller element size, perform multiple element
3732/// extracts and merge the results. If this is coercing to a vector with larger
3733/// elements, index the bitcasted vector and extract the target element with bit
3734/// operations. This is intended to force the indexing in the native register
3735/// size for architectures that can dynamically index the register file.
3738 LLT CastTy) {
3739 if (TypeIdx != 1)
3740 return UnableToLegalize;
3741
3742 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3743
3744 LLT SrcEltTy = SrcVecTy.getElementType();
3745 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3746 unsigned OldNumElts = SrcVecTy.getNumElements();
3747
3748 LLT NewEltTy = CastTy.getScalarType();
3749 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3750
3751 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3752 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3753 if (NewNumElts > OldNumElts) {
3754 // Decreasing the vector element size
3755 //
3756 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3757 // =>
3758 // v4i32:castx = bitcast x:v2i64
3759 //
3760 // i64 = bitcast
3761 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3762 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3763 //
3764 if (NewNumElts % OldNumElts != 0)
3765 return UnableToLegalize;
3766
3767 // Type of the intermediate result vector.
3768 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3769 LLT MidTy =
3770 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3771
3772 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3773
3774 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3775 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3776
3777 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3778 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3779 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3780 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3781 NewOps[I] = Elt.getReg(0);
3782 }
3783
3784 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3785 MIRBuilder.buildBitcast(Dst, NewVec);
3786 MI.eraseFromParent();
3787 return Legalized;
3788 }
3789
3790 if (NewNumElts < OldNumElts) {
3791 if (NewEltSize % OldEltSize != 0)
3792 return UnableToLegalize;
3793
3794 // This only depends on powers of 2 because we use bit tricks to figure out
3795 // the bit offset we need to shift to get the target element. A general
3796 // expansion could emit division/multiply.
3797 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3798 return UnableToLegalize;
3799
3800 // Increasing the vector element size.
3801 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3802 //
3803 // =>
3804 //
3805 // %cast = G_BITCAST %vec
3806 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3807 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3808 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3809 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3810 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3811 // %elt = G_TRUNC %elt_bits
3812
3813 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3814 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3815
3816 // Divide to get the index in the wider element type.
3817 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3818
3819 Register WideElt = CastVec;
3820 if (CastTy.isVector()) {
3821 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3822 ScaledIdx).getReg(0);
3823 }
3824
3825 // Compute the bit offset into the register of the target element.
3827 MIRBuilder, Idx, NewEltSize, OldEltSize);
3828
3829 // Shift the wide element to get the target element.
3830 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3831 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3832 MI.eraseFromParent();
3833 return Legalized;
3834 }
3835
3836 return UnableToLegalize;
3837}
3838
3839/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3840/// TargetReg, while preserving other bits in \p TargetReg.
3841///
3842/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3844 Register TargetReg, Register InsertReg,
3845 Register OffsetBits) {
3846 LLT TargetTy = B.getMRI()->getType(TargetReg);
3847 LLT InsertTy = B.getMRI()->getType(InsertReg);
3848 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3849 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3850
3851 // Produce a bitmask of the value to insert
3852 auto EltMask = B.buildConstant(
3853 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3854 InsertTy.getSizeInBits()));
3855 // Shift it into position
3856 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3857 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3858
3859 // Clear out the bits in the wide element
3860 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3861
3862 // The value to insert has all zeros already, so stick it into the masked
3863 // wide element.
3864 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3865}
3866
3867/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3868/// is increasing the element size, perform the indexing in the target element
3869/// type, and use bit operations to insert at the element position. This is
3870/// intended for architectures that can dynamically index the register file and
3871/// want to force indexing in the native register size.
3874 LLT CastTy) {
3875 if (TypeIdx != 0)
3876 return UnableToLegalize;
3877
3878 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3879 MI.getFirst4RegLLTs();
3880 LLT VecTy = DstTy;
3881
3882 LLT VecEltTy = VecTy.getElementType();
3883 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3884 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3885 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3886
3887 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3888 unsigned OldNumElts = VecTy.getNumElements();
3889
3890 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3891 if (NewNumElts < OldNumElts) {
3892 if (NewEltSize % OldEltSize != 0)
3893 return UnableToLegalize;
3894
3895 // This only depends on powers of 2 because we use bit tricks to figure out
3896 // the bit offset we need to shift to get the target element. A general
3897 // expansion could emit division/multiply.
3898 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3899 return UnableToLegalize;
3900
3901 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3902 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3903
3904 // Divide to get the index in the wider element type.
3905 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3906
3907 Register ExtractedElt = CastVec;
3908 if (CastTy.isVector()) {
3909 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3910 ScaledIdx).getReg(0);
3911 }
3912
3913 // Compute the bit offset into the register of the target element.
3915 MIRBuilder, Idx, NewEltSize, OldEltSize);
3916
3917 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3918 Val, OffsetBits);
3919 if (CastTy.isVector()) {
3920 InsertedElt = MIRBuilder.buildInsertVectorElement(
3921 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3922 }
3923
3924 MIRBuilder.buildBitcast(Dst, InsertedElt);
3925 MI.eraseFromParent();
3926 return Legalized;
3927 }
3928
3929 return UnableToLegalize;
3930}
3931
3932// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3933// those that have smaller than legal operands.
3934//
3935// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3936//
3937// ===>
3938//
3939// s32 = G_BITCAST <4 x s8>
3940// s32 = G_BITCAST <4 x s8>
3941// s32 = G_BITCAST <4 x s8>
3942// s32 = G_BITCAST <4 x s8>
3943// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3944// <16 x s8> = G_BITCAST <4 x s32>
3947 LLT CastTy) {
3948 // Convert it to CONCAT instruction
3949 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3950 if (!ConcatMI) {
3951 return UnableToLegalize;
3952 }
3953
3954 // Check if bitcast is Legal
3955 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3956 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3957
3958 // Check if the build vector is Legal
3959 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3960 return UnableToLegalize;
3961 }
3962
3963 // Bitcast the sources
3964 SmallVector<Register> BitcastRegs;
3965 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3966 BitcastRegs.push_back(
3967 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3968 .getReg(0));
3969 }
3970
3971 // Build the scalar values into a vector
3972 Register BuildReg =
3973 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3974 MIRBuilder.buildBitcast(DstReg, BuildReg);
3975
3976 MI.eraseFromParent();
3977 return Legalized;
3978}
3979
3980// This bitcasts a shuffle vector to a different type currently of the same
3981// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3982// will be used instead.
3983//
3984// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3985// ===>
3986// <4 x s64> = G_PTRTOINT <4 x p0>
3987// <4 x s64> = G_PTRTOINT <4 x p0>
3988// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3989// <16 x p0> = G_INTTOPTR <16 x s64>
3992 LLT CastTy) {
3993 auto ShuffleMI = cast<GShuffleVector>(&MI);
3994 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3995 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3996
3997 // We currently only handle vectors of the same size.
3998 if (TypeIdx != 0 ||
3999 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4000 CastTy.getElementCount() != DstTy.getElementCount())
4001 return UnableToLegalize;
4002
4003 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4004
4005 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4006 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4007 auto Shuf =
4008 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4009 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4010
4011 MI.eraseFromParent();
4012 return Legalized;
4013}
4014
4015/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4016///
4017/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4018///
4019/// ===>
4020///
4021/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4022/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4023/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4026 LLT CastTy) {
4027 auto ES = cast<GExtractSubvector>(&MI);
4028
4029 if (!CastTy.isVector())
4030 return UnableToLegalize;
4031
4032 if (TypeIdx != 0)
4033 return UnableToLegalize;
4034
4035 Register Dst = ES->getReg(0);
4036 Register Src = ES->getSrcVec();
4037 uint64_t Idx = ES->getIndexImm();
4038
4039 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4040
4041 LLT DstTy = MRI.getType(Dst);
4042 LLT SrcTy = MRI.getType(Src);
4043 ElementCount DstTyEC = DstTy.getElementCount();
4044 ElementCount SrcTyEC = SrcTy.getElementCount();
4045 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4046 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4047
4048 if (DstTy == CastTy)
4049 return Legalized;
4050
4051 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4052 return UnableToLegalize;
4053
4054 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4055 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4056 if (CastEltSize < DstEltSize)
4057 return UnableToLegalize;
4058
4059 auto AdjustAmt = CastEltSize / DstEltSize;
4060 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4061 SrcTyMinElts % AdjustAmt != 0)
4062 return UnableToLegalize;
4063
4064 Idx /= AdjustAmt;
4065 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4066 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4067 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4068 MIRBuilder.buildBitcast(Dst, PromotedES);
4069
4070 ES->eraseFromParent();
4071 return Legalized;
4072}
4073
4074/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4075///
4076/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4077/// <vscale x 8 x i1>,
4078/// N
4079///
4080/// ===>
4081///
4082/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4083/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4084/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4085/// <vscale x 1 x i8>, N / 8
4086/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4089 LLT CastTy) {
4090 auto ES = cast<GInsertSubvector>(&MI);
4091
4092 if (!CastTy.isVector())
4093 return UnableToLegalize;
4094
4095 if (TypeIdx != 0)
4096 return UnableToLegalize;
4097
4098 Register Dst = ES->getReg(0);
4099 Register BigVec = ES->getBigVec();
4100 Register SubVec = ES->getSubVec();
4101 uint64_t Idx = ES->getIndexImm();
4102
4103 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4104
4105 LLT DstTy = MRI.getType(Dst);
4106 LLT BigVecTy = MRI.getType(BigVec);
4107 LLT SubVecTy = MRI.getType(SubVec);
4108
4109 if (DstTy == CastTy)
4110 return Legalized;
4111
4112 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4113 return UnableToLegalize;
4114
4115 ElementCount DstTyEC = DstTy.getElementCount();
4116 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4117 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4118 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4119 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4120 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4121
4122 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4123 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4124 if (CastEltSize < DstEltSize)
4125 return UnableToLegalize;
4126
4127 auto AdjustAmt = CastEltSize / DstEltSize;
4128 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4129 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4130 return UnableToLegalize;
4131
4132 Idx /= AdjustAmt;
4133 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4134 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4135 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4136 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4137 auto PromotedIS =
4138 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4139 MIRBuilder.buildBitcast(Dst, PromotedIS);
4140
4141 ES->eraseFromParent();
4142 return Legalized;
4143}
4144
4146 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4147 Register DstReg = LoadMI.getDstReg();
4148 Register PtrReg = LoadMI.getPointerReg();
4149 LLT DstTy = MRI.getType(DstReg);
4150 MachineMemOperand &MMO = LoadMI.getMMO();
4151 LLT MemTy = MMO.getMemoryType();
4152 MachineFunction &MF = MIRBuilder.getMF();
4153
4154 unsigned MemSizeInBits = MemTy.getSizeInBits();
4155 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4156
4157 if (MemSizeInBits != MemStoreSizeInBits) {
4158 if (MemTy.isVector())
4159 return UnableToLegalize;
4160
4161 // Promote to a byte-sized load if not loading an integral number of
4162 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4163 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4164 MachineMemOperand *NewMMO =
4165 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4166
4167 Register LoadReg = DstReg;
4168 LLT LoadTy = DstTy;
4169
4170 // If this wasn't already an extending load, we need to widen the result
4171 // register to avoid creating a load with a narrower result than the source.
4172 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4173 LoadTy = WideMemTy;
4174 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4175 }
4176
4177 if (isa<GSExtLoad>(LoadMI)) {
4178 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4179 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4180 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4181 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4182 // The extra bits are guaranteed to be zero, since we stored them that
4183 // way. A zext load from Wide thus automatically gives zext from MemVT.
4184 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4185 } else {
4186 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4187 }
4188
4189 if (DstTy != LoadTy)
4190 MIRBuilder.buildTrunc(DstReg, LoadReg);
4191
4192 LoadMI.eraseFromParent();
4193 return Legalized;
4194 }
4195
4196 // Big endian lowering not implemented.
4197 if (MIRBuilder.getDataLayout().isBigEndian())
4198 return UnableToLegalize;
4199
4200 // This load needs splitting into power of 2 sized loads.
4201 //
4202 // Our strategy here is to generate anyextending loads for the smaller
4203 // types up to next power-2 result type, and then combine the two larger
4204 // result values together, before truncating back down to the non-pow-2
4205 // type.
4206 // E.g. v1 = i24 load =>
4207 // v2 = i32 zextload (2 byte)
4208 // v3 = i32 load (1 byte)
4209 // v4 = i32 shl v3, 16
4210 // v5 = i32 or v4, v2
4211 // v1 = i24 trunc v5
4212 // By doing this we generate the correct truncate which should get
4213 // combined away as an artifact with a matching extend.
4214
4215 uint64_t LargeSplitSize, SmallSplitSize;
4216
4217 if (!isPowerOf2_32(MemSizeInBits)) {
4218 // This load needs splitting into power of 2 sized loads.
4219 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4220 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4221 } else {
4222 // This is already a power of 2, but we still need to split this in half.
4223 //
4224 // Assume we're being asked to decompose an unaligned load.
4225 // TODO: If this requires multiple splits, handle them all at once.
4226 auto &Ctx = MF.getFunction().getContext();
4227 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4228 return UnableToLegalize;
4229
4230 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4231 }
4232
4233 if (MemTy.isVector()) {
4234 // TODO: Handle vector extloads
4235 if (MemTy != DstTy)
4236 return UnableToLegalize;
4237
4238 Align Alignment = LoadMI.getAlign();
4239 // Given an alignment larger than the size of the memory, we can increase
4240 // the size of the load without needing to scalarize it.
4241 if (Alignment.value() * 8 > MemSizeInBits &&
4243 LLT MoreTy = DstTy.changeVectorElementCount(
4245 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4246 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4247 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4248 NewLoad.getReg(0));
4249 LoadMI.eraseFromParent();
4250 return Legalized;
4251 }
4252
4253 // TODO: We can do better than scalarizing the vector and at least split it
4254 // in half.
4255 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4256 }
4257
4258 MachineMemOperand *LargeMMO =
4259 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4260 MachineMemOperand *SmallMMO =
4261 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4262
4263 LLT PtrTy = MRI.getType(PtrReg);
4264 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4265 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4266 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4267 PtrReg, *LargeMMO);
4268
4269 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4270 LargeSplitSize / 8);
4271 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4272 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4273 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4274 SmallPtr, *SmallMMO);
4275
4276 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4277 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4278
4279 if (AnyExtTy == DstTy)
4280 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4281 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4282 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4283 MIRBuilder.buildTrunc(DstReg, {Or});
4284 } else {
4285 assert(DstTy.isPointer() && "expected pointer");
4286 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4287
4288 // FIXME: We currently consider this to be illegal for non-integral address
4289 // spaces, but we need still need a way to reinterpret the bits.
4290 MIRBuilder.buildIntToPtr(DstReg, Or);
4291 }
4292
4293 LoadMI.eraseFromParent();
4294 return Legalized;
4295}
4296
4298 // Lower a non-power of 2 store into multiple pow-2 stores.
4299 // E.g. split an i24 store into an i16 store + i8 store.
4300 // We do this by first extending the stored value to the next largest power
4301 // of 2 type, and then using truncating stores to store the components.
4302 // By doing this, likewise with G_LOAD, generate an extend that can be
4303 // artifact-combined away instead of leaving behind extracts.
4304 Register SrcReg = StoreMI.getValueReg();
4305 Register PtrReg = StoreMI.getPointerReg();
4306 LLT SrcTy = MRI.getType(SrcReg);
4307 MachineFunction &MF = MIRBuilder.getMF();
4308 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4309 LLT MemTy = MMO.getMemoryType();
4310
4311 unsigned StoreWidth = MemTy.getSizeInBits();
4312 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4313
4314 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4315 // Promote to a byte-sized store with upper bits zero if not
4316 // storing an integral number of bytes. For example, promote
4317 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4318 LLT WideTy = LLT::scalar(StoreSizeInBits);
4319
4320 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4321 // Avoid creating a store with a narrower source than result.
4322 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4323 SrcTy = WideTy;
4324 }
4325
4326 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4327
4328 MachineMemOperand *NewMMO =
4329 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4330 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4331 StoreMI.eraseFromParent();
4332 return Legalized;
4333 }
4334
4335 if (MemTy.isVector()) {
4336 if (MemTy != SrcTy)
4337 return scalarizeVectorBooleanStore(StoreMI);
4338
4339 // TODO: We can do better than scalarizing the vector and at least split it
4340 // in half.
4341 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4342 }
4343
4344 unsigned MemSizeInBits = MemTy.getSizeInBits();
4345 uint64_t LargeSplitSize, SmallSplitSize;
4346
4347 if (!isPowerOf2_32(MemSizeInBits)) {
4348 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4349 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4350 } else {
4351 auto &Ctx = MF.getFunction().getContext();
4352 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4353 return UnableToLegalize; // Don't know what we're being asked to do.
4354
4355 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4356 }
4357
4358 // Extend to the next pow-2. If this store was itself the result of lowering,
4359 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4360 // that's wider than the stored size.
4361 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4362 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4363
4364 if (SrcTy.isPointer()) {
4365 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4366 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4367 }
4368
4369 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4370
4371 // Obtain the smaller value by shifting away the larger value.
4372 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4373 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4374
4375 // Generate the PtrAdd and truncating stores.
4376 LLT PtrTy = MRI.getType(PtrReg);
4377 auto OffsetCst = MIRBuilder.buildConstant(
4378 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4379 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4380
4381 MachineMemOperand *LargeMMO =
4382 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4383 MachineMemOperand *SmallMMO =
4384 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4385 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4386 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4387 StoreMI.eraseFromParent();
4388 return Legalized;
4389}
4390
4393 Register SrcReg = StoreMI.getValueReg();
4394 Register PtrReg = StoreMI.getPointerReg();
4395 LLT SrcTy = MRI.getType(SrcReg);
4396 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4397 LLT MemTy = MMO.getMemoryType();
4398 LLT MemScalarTy = MemTy.getElementType();
4399 MachineFunction &MF = MIRBuilder.getMF();
4400
4401 assert(SrcTy.isVector() && "Expect a vector store type");
4402
4403 if (!MemScalarTy.isByteSized()) {
4404 // We need to build an integer scalar of the vector bit pattern.
4405 // It's not legal for us to add padding when storing a vector.
4406 unsigned NumBits = MemTy.getSizeInBits();
4407 LLT IntTy = LLT::scalar(NumBits);
4408 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4409 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4410
4411 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4412 auto Elt = MIRBuilder.buildExtractVectorElement(
4413 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4414 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4415 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4416 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4417 ? (MemTy.getNumElements() - 1) - I
4418 : I;
4419 auto ShiftAmt = MIRBuilder.buildConstant(
4420 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4421 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4422 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4423 }
4424 auto PtrInfo = MMO.getPointerInfo();
4425 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4426 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4427 StoreMI.eraseFromParent();
4428 return Legalized;
4429 }
4430
4431 // TODO: implement simple scalarization.
4432 return UnableToLegalize;
4433}
4434
4436LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4437 switch (MI.getOpcode()) {
4438 case TargetOpcode::G_LOAD: {
4439 if (TypeIdx != 0)
4440 return UnableToLegalize;
4441 MachineMemOperand &MMO = **MI.memoperands_begin();
4442
4443 // Not sure how to interpret a bitcast of an extending load.
4444 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4445 return UnableToLegalize;
4446
4447 Observer.changingInstr(MI);
4448 bitcastDst(MI, CastTy, 0);
4449 MMO.setType(CastTy);
4450 // The range metadata is no longer valid when reinterpreted as a different
4451 // type.
4452 MMO.clearRanges();
4453 Observer.changedInstr(MI);
4454 return Legalized;
4455 }
4456 case TargetOpcode::G_STORE: {
4457 if (TypeIdx != 0)
4458 return UnableToLegalize;
4459
4460 MachineMemOperand &MMO = **MI.memoperands_begin();
4461
4462 // Not sure how to interpret a bitcast of a truncating store.
4463 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4464 return UnableToLegalize;
4465
4466 Observer.changingInstr(MI);
4467 bitcastSrc(MI, CastTy, 0);
4468 MMO.setType(CastTy);
4469 Observer.changedInstr(MI);
4470 return Legalized;
4471 }
4472 case TargetOpcode::G_SELECT: {
4473 if (TypeIdx != 0)
4474 return UnableToLegalize;
4475
4476 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4477 LLVM_DEBUG(
4478 dbgs() << "bitcast action not implemented for vector select\n");
4479 return UnableToLegalize;
4480 }
4481
4482 Observer.changingInstr(MI);
4483 bitcastSrc(MI, CastTy, 2);
4484 bitcastSrc(MI, CastTy, 3);
4485 bitcastDst(MI, CastTy, 0);
4486 Observer.changedInstr(MI);
4487 return Legalized;
4488 }
4489 case TargetOpcode::G_AND:
4490 case TargetOpcode::G_OR:
4491 case TargetOpcode::G_XOR: {
4492 Observer.changingInstr(MI);
4493 bitcastSrc(MI, CastTy, 1);
4494 bitcastSrc(MI, CastTy, 2);
4495 bitcastDst(MI, CastTy, 0);
4496 Observer.changedInstr(MI);
4497 return Legalized;
4498 }
4499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4500 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4501 case TargetOpcode::G_INSERT_VECTOR_ELT:
4502 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4503 case TargetOpcode::G_CONCAT_VECTORS:
4504 return bitcastConcatVector(MI, TypeIdx, CastTy);
4505 case TargetOpcode::G_SHUFFLE_VECTOR:
4506 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4507 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4508 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4509 case TargetOpcode::G_INSERT_SUBVECTOR:
4510 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4511 default:
4512 return UnableToLegalize;
4513 }
4514}
4515
4516// Legalize an instruction by changing the opcode in place.
4517void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4519 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4521}
4522
4524LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4525 using namespace TargetOpcode;
4526
4527 switch(MI.getOpcode()) {
4528 default:
4529 return UnableToLegalize;
4530 case TargetOpcode::G_FCONSTANT:
4531 return lowerFConstant(MI);
4532 case TargetOpcode::G_BITCAST:
4533 return lowerBitcast(MI);
4534 case TargetOpcode::G_SREM:
4535 case TargetOpcode::G_UREM: {
4536 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4537 auto Quot =
4538 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4539 {MI.getOperand(1), MI.getOperand(2)});
4540
4541 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4542 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4543 MI.eraseFromParent();
4544 return Legalized;
4545 }
4546 case TargetOpcode::G_SADDO:
4547 case TargetOpcode::G_SSUBO:
4548 return lowerSADDO_SSUBO(MI);
4549 case TargetOpcode::G_SADDE:
4550 return lowerSADDE(MI);
4551 case TargetOpcode::G_SSUBE:
4552 return lowerSSUBE(MI);
4553 case TargetOpcode::G_UMULH:
4554 case TargetOpcode::G_SMULH:
4555 return lowerSMULH_UMULH(MI);
4556 case TargetOpcode::G_SMULO:
4557 case TargetOpcode::G_UMULO: {
4558 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4559 // result.
4560 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4561 LLT Ty = MRI.getType(Res);
4562
4563 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4564 ? TargetOpcode::G_SMULH
4565 : TargetOpcode::G_UMULH;
4566
4567 Observer.changingInstr(MI);
4568 const auto &TII = MIRBuilder.getTII();
4569 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4570 MI.removeOperand(1);
4571 Observer.changedInstr(MI);
4572
4573 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4574 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4575
4576 // Move insert point forward so we can use the Res register if needed.
4577 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4578
4579 // For *signed* multiply, overflow is detected by checking:
4580 // (hi != (lo >> bitwidth-1))
4581 if (Opcode == TargetOpcode::G_SMULH) {
4582 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4583 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4584 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4585 } else {
4586 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4587 }
4588 return Legalized;
4589 }
4590 case TargetOpcode::G_FNEG: {
4591 auto [Res, SubByReg] = MI.getFirst2Regs();
4592 LLT Ty = MRI.getType(Res);
4593
4594 auto SignMask = MIRBuilder.buildConstant(
4595 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4596 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4597 MI.eraseFromParent();
4598 return Legalized;
4599 }
4600 case TargetOpcode::G_FSUB:
4601 case TargetOpcode::G_STRICT_FSUB: {
4602 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4603 LLT Ty = MRI.getType(Res);
4604
4605 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4606 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4607
4608 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4609 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4610 else
4611 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4612
4613 MI.eraseFromParent();
4614 return Legalized;
4615 }
4616 case TargetOpcode::G_FMAD:
4617 return lowerFMad(MI);
4618 case TargetOpcode::G_FFLOOR:
4619 return lowerFFloor(MI);
4620 case TargetOpcode::G_LROUND:
4621 case TargetOpcode::G_LLROUND: {
4622 Register DstReg = MI.getOperand(0).getReg();
4623 Register SrcReg = MI.getOperand(1).getReg();
4624 LLT SrcTy = MRI.getType(SrcReg);
4625 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4626 {SrcReg});
4627 MIRBuilder.buildFPTOSI(DstReg, Round);
4628 MI.eraseFromParent();
4629 return Legalized;
4630 }
4631 case TargetOpcode::G_INTRINSIC_ROUND:
4632 return lowerIntrinsicRound(MI);
4633 case TargetOpcode::G_FRINT: {
4634 // Since round even is the assumed rounding mode for unconstrained FP
4635 // operations, rint and roundeven are the same operation.
4636 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4637 return Legalized;
4638 }
4639 case TargetOpcode::G_INTRINSIC_LRINT:
4640 case TargetOpcode::G_INTRINSIC_LLRINT: {
4641 Register DstReg = MI.getOperand(0).getReg();
4642 Register SrcReg = MI.getOperand(1).getReg();
4643 LLT SrcTy = MRI.getType(SrcReg);
4644 auto Round =
4645 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4646 MIRBuilder.buildFPTOSI(DstReg, Round);
4647 MI.eraseFromParent();
4648 return Legalized;
4649 }
4650 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4651 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4652 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4653 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4654 **MI.memoperands_begin());
4655 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4656 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4657 MI.eraseFromParent();
4658 return Legalized;
4659 }
4660 case TargetOpcode::G_LOAD:
4661 case TargetOpcode::G_SEXTLOAD:
4662 case TargetOpcode::G_ZEXTLOAD:
4663 return lowerLoad(cast<GAnyLoad>(MI));
4664 case TargetOpcode::G_STORE:
4665 return lowerStore(cast<GStore>(MI));
4666 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4667 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4668 case TargetOpcode::G_CTLZ:
4669 case TargetOpcode::G_CTTZ:
4670 case TargetOpcode::G_CTPOP:
4671 case TargetOpcode::G_CTLS:
4672 return lowerBitCount(MI);
4673 case G_UADDO: {
4674 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4675
4676 Register NewRes = MRI.cloneVirtualRegister(Res);
4677
4678 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4679 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4680
4681 MIRBuilder.buildCopy(Res, NewRes);
4682
4683 MI.eraseFromParent();
4684 return Legalized;
4685 }
4686 case G_UADDE: {
4687 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4688 const LLT CondTy = MRI.getType(CarryOut);
4689 const LLT Ty = MRI.getType(Res);
4690
4691 Register NewRes = MRI.cloneVirtualRegister(Res);
4692
4693 // Initial add of the two operands.
4694 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4695
4696 // Initial check for carry.
4697 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4698
4699 // Add the sum and the carry.
4700 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4701 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4702
4703 // Second check for carry. We can only carry if the initial sum is all 1s
4704 // and the carry is set, resulting in a new sum of 0.
4705 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4706 auto ResEqZero =
4707 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4708 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4709 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4710
4711 MIRBuilder.buildCopy(Res, NewRes);
4712
4713 MI.eraseFromParent();
4714 return Legalized;
4715 }
4716 case G_USUBO: {
4717 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4718
4719 MIRBuilder.buildSub(Res, LHS, RHS);
4720 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4721
4722 MI.eraseFromParent();
4723 return Legalized;
4724 }
4725 case G_USUBE: {
4726 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4727 const LLT CondTy = MRI.getType(BorrowOut);
4728 const LLT Ty = MRI.getType(Res);
4729
4730 // Initial subtract of the two operands.
4731 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4732
4733 // Initial check for borrow.
4734 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4735
4736 // Subtract the borrow from the first subtract.
4737 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4738 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4739
4740 // Second check for borrow. We can only borrow if the initial difference is
4741 // 0 and the borrow is set, resulting in a new difference of all 1s.
4742 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4743 auto TmpResEqZero =
4744 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4745 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4746 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4747
4748 MI.eraseFromParent();
4749 return Legalized;
4750 }
4751 case G_UITOFP:
4752 return lowerUITOFP(MI);
4753 case G_SITOFP:
4754 return lowerSITOFP(MI);
4755 case G_FPTOUI:
4756 return lowerFPTOUI(MI);
4757 case G_FPTOSI:
4758 return lowerFPTOSI(MI);
4759 case G_FPTOUI_SAT:
4760 case G_FPTOSI_SAT:
4761 return lowerFPTOINT_SAT(MI);
4762 case G_FPTRUNC:
4763 return lowerFPTRUNC(MI);
4764 case G_FPOWI:
4765 return lowerFPOWI(MI);
4766 case G_FMODF:
4767 return lowerFMODF(MI);
4768 case G_SMIN:
4769 case G_SMAX:
4770 case G_UMIN:
4771 case G_UMAX:
4772 return lowerMinMax(MI);
4773 case G_SCMP:
4774 case G_UCMP:
4775 return lowerThreewayCompare(MI);
4776 case G_FCOPYSIGN:
4777 return lowerFCopySign(MI);
4778 case G_FMINNUM:
4779 case G_FMAXNUM:
4780 case G_FMINIMUMNUM:
4781 case G_FMAXIMUMNUM:
4782 return lowerFMinNumMaxNum(MI);
4783 case G_FMINIMUM:
4784 case G_FMAXIMUM:
4785 return lowerFMinimumMaximum(MI);
4786 case G_MERGE_VALUES:
4787 return lowerMergeValues(MI);
4788 case G_UNMERGE_VALUES:
4789 return lowerUnmergeValues(MI);
4790 case TargetOpcode::G_SEXT_INREG: {
4791 assert(MI.getOperand(2).isImm() && "Expected immediate");
4792 int64_t SizeInBits = MI.getOperand(2).getImm();
4793
4794 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4795 LLT DstTy = MRI.getType(DstReg);
4796 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4797
4798 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4799 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4800 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4801 MI.eraseFromParent();
4802 return Legalized;
4803 }
4804 case G_EXTRACT_VECTOR_ELT:
4805 case G_INSERT_VECTOR_ELT:
4807 case G_SHUFFLE_VECTOR:
4808 return lowerShuffleVector(MI);
4809 case G_VECTOR_COMPRESS:
4810 return lowerVECTOR_COMPRESS(MI);
4811 case G_DYN_STACKALLOC:
4812 return lowerDynStackAlloc(MI);
4813 case G_STACKSAVE:
4814 return lowerStackSave(MI);
4815 case G_STACKRESTORE:
4816 return lowerStackRestore(MI);
4817 case G_EXTRACT:
4818 return lowerExtract(MI);
4819 case G_INSERT:
4820 return lowerInsert(MI);
4821 case G_BSWAP:
4822 return lowerBswap(MI);
4823 case G_BITREVERSE:
4824 return lowerBitreverse(MI);
4825 case G_READ_REGISTER:
4826 case G_WRITE_REGISTER:
4827 return lowerReadWriteRegister(MI);
4828 case G_UADDSAT:
4829 case G_USUBSAT: {
4830 // Try to make a reasonable guess about which lowering strategy to use. The
4831 // target can override this with custom lowering and calling the
4832 // implementation functions.
4833 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4834 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4835 return lowerAddSubSatToMinMax(MI);
4837 }
4838 case G_SADDSAT:
4839 case G_SSUBSAT: {
4840 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4841
4842 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4843 // since it's a shorter expansion. However, we would need to figure out the
4844 // preferred boolean type for the carry out for the query.
4845 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4846 return lowerAddSubSatToMinMax(MI);
4848 }
4849 case G_SSHLSAT:
4850 case G_USHLSAT:
4851 return lowerShlSat(MI);
4852 case G_ABS:
4853 return lowerAbsToAddXor(MI);
4854 case G_ABDS:
4855 case G_ABDU: {
4856 bool IsSigned = MI.getOpcode() == G_ABDS;
4857 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4858 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4859 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4860 return lowerAbsDiffToMinMax(MI);
4861 }
4862 return lowerAbsDiffToSelect(MI);
4863 }
4864 case G_FABS:
4865 return lowerFAbs(MI);
4866 case G_SELECT:
4867 return lowerSelect(MI);
4868 case G_IS_FPCLASS:
4869 return lowerISFPCLASS(MI);
4870 case G_SDIVREM:
4871 case G_UDIVREM:
4872 return lowerDIVREM(MI);
4873 case G_FSHL:
4874 case G_FSHR:
4875 return lowerFunnelShift(MI);
4876 case G_ROTL:
4877 case G_ROTR:
4878 return lowerRotate(MI);
4879 case G_MEMSET:
4880 case G_MEMCPY:
4881 case G_MEMMOVE:
4882 return lowerMemCpyFamily(MI);
4883 case G_MEMCPY_INLINE:
4884 return lowerMemcpyInline(MI);
4885 case G_ZEXT:
4886 case G_SEXT:
4887 case G_ANYEXT:
4888 return lowerEXT(MI);
4889 case G_TRUNC:
4890 return lowerTRUNC(MI);
4892 return lowerVectorReduction(MI);
4893 case G_VAARG:
4894 return lowerVAArg(MI);
4895 case G_ATOMICRMW_SUB: {
4896 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4897 const LLT ValTy = MRI.getType(Val);
4898 MachineMemOperand *MMO = *MI.memoperands_begin();
4899
4900 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4901 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4902 MI.eraseFromParent();
4903 return Legalized;
4904 }
4905 }
4906}
4907
4909 Align MinAlign) const {
4910 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4911 // datalayout for the preferred alignment. Also there should be a target hook
4912 // for this to allow targets to reduce the alignment and ignore the
4913 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4914 // the type.
4915 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4916}
4917
4920 MachinePointerInfo &PtrInfo) {
4921 MachineFunction &MF = MIRBuilder.getMF();
4922 const DataLayout &DL = MIRBuilder.getDataLayout();
4923 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4924
4925 unsigned AddrSpace = DL.getAllocaAddrSpace();
4926 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4927
4928 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4929 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4930}
4931
4933 const SrcOp &Val) {
4934 LLT SrcTy = Val.getLLTTy(MRI);
4935 Align StackTypeAlign =
4936 std::max(getStackTemporaryAlignment(SrcTy),
4938 MachinePointerInfo PtrInfo;
4939 auto StackTemp =
4940 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4941
4942 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4943 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4944}
4945
4947 LLT VecTy) {
4948 LLT IdxTy = B.getMRI()->getType(IdxReg);
4949 unsigned NElts = VecTy.getNumElements();
4950
4951 int64_t IdxVal;
4952 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4953 if (IdxVal < VecTy.getNumElements())
4954 return IdxReg;
4955 // If a constant index would be out of bounds, clamp it as well.
4956 }
4957
4958 if (isPowerOf2_32(NElts)) {
4959 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4960 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4961 }
4962
4963 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4964 .getReg(0);
4965}
4966
4968 Register Index) {
4969 LLT EltTy = VecTy.getElementType();
4970
4971 // Calculate the element offset and add it to the pointer.
4972 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4973 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4974 "Converting bits to bytes lost precision");
4975
4976 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4977
4978 // Convert index to the correct size for the address space.
4979 const DataLayout &DL = MIRBuilder.getDataLayout();
4980 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4981 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4982 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4983 if (IdxTy != MRI.getType(Index))
4984 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4985
4986 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4987 MIRBuilder.buildConstant(IdxTy, EltSize));
4988
4989 LLT PtrTy = MRI.getType(VecPtr);
4990 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4991}
4992
4993#ifndef NDEBUG
4994/// Check that all vector operands have same number of elements. Other operands
4995/// should be listed in NonVecOp.
4998 std::initializer_list<unsigned> NonVecOpIndices) {
4999 if (MI.getNumMemOperands() != 0)
5000 return false;
5001
5002 LLT VecTy = MRI.getType(MI.getReg(0));
5003 if (!VecTy.isVector())
5004 return false;
5005 unsigned NumElts = VecTy.getNumElements();
5006
5007 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5008 MachineOperand &Op = MI.getOperand(OpIdx);
5009 if (!Op.isReg()) {
5010 if (!is_contained(NonVecOpIndices, OpIdx))
5011 return false;
5012 continue;
5013 }
5014
5015 LLT Ty = MRI.getType(Op.getReg());
5016 if (!Ty.isVector()) {
5017 if (!is_contained(NonVecOpIndices, OpIdx))
5018 return false;
5019 continue;
5020 }
5021
5022 if (Ty.getNumElements() != NumElts)
5023 return false;
5024 }
5025
5026 return true;
5027}
5028#endif
5029
5030/// Fill \p DstOps with DstOps that have same number of elements combined as
5031/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5032/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5033/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5034static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5035 unsigned NumElts) {
5036 LLT LeftoverTy;
5037 assert(Ty.isVector() && "Expected vector type");
5038 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5039 int NumParts, NumLeftover;
5040 std::tie(NumParts, NumLeftover) =
5041 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5042
5043 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5044 for (int i = 0; i < NumParts; ++i) {
5045 DstOps.push_back(NarrowTy);
5046 }
5047
5048 if (LeftoverTy.isValid()) {
5049 assert(NumLeftover == 1 && "expected exactly one leftover");
5050 DstOps.push_back(LeftoverTy);
5051 }
5052}
5053
5054/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5055/// made from \p Op depending on operand type.
5057 MachineOperand &Op) {
5058 for (unsigned i = 0; i < N; ++i) {
5059 if (Op.isReg())
5060 Ops.push_back(Op.getReg());
5061 else if (Op.isImm())
5062 Ops.push_back(Op.getImm());
5063 else if (Op.isPredicate())
5064 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5065 else
5066 llvm_unreachable("Unsupported type");
5067 }
5068}
5069
5070// Handle splitting vector operations which need to have the same number of
5071// elements in each type index, but each type index may have a different element
5072// type.
5073//
5074// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5075// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5076// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5077//
5078// Also handles some irregular breakdown cases, e.g.
5079// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5080// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5081// s64 = G_SHL s64, s32
5084 GenericMachineInstr &MI, unsigned NumElts,
5085 std::initializer_list<unsigned> NonVecOpIndices) {
5086 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5087 "Non-compatible opcode or not specified non-vector operands");
5088 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5089
5090 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5091 unsigned NumDefs = MI.getNumDefs();
5092
5093 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5094 // Build instructions with DstOps to use instruction found by CSE directly.
5095 // CSE copies found instruction into given vreg when building with vreg dest.
5096 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5097 // Output registers will be taken from created instructions.
5098 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5099 for (unsigned i = 0; i < NumDefs; ++i) {
5100 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5101 }
5102
5103 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5104 // Operands listed in NonVecOpIndices will be used as is without splitting;
5105 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5106 // scalar condition (op 1), immediate in sext_inreg (op 2).
5107 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5108 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5109 ++UseIdx, ++UseNo) {
5110 if (is_contained(NonVecOpIndices, UseIdx)) {
5111 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5112 MI.getOperand(UseIdx));
5113 } else {
5114 SmallVector<Register, 8> SplitPieces;
5115 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5116 MRI);
5117 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5118 }
5119 }
5120
5121 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5122
5123 // Take i-th piece of each input operand split and build sub-vector/scalar
5124 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5125 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5127 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5128 Defs.push_back(OutputOpsPieces[DstNo][i]);
5129
5131 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5132 Uses.push_back(InputOpsPieces[InputNo][i]);
5133
5134 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5135 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5136 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5137 }
5138
5139 // Merge small outputs into MI's output for each def operand.
5140 if (NumLeftovers) {
5141 for (unsigned i = 0; i < NumDefs; ++i)
5142 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5143 } else {
5144 for (unsigned i = 0; i < NumDefs; ++i)
5145 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5146 }
5147
5148 MI.eraseFromParent();
5149 return Legalized;
5150}
5151
5154 unsigned NumElts) {
5155 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5156
5157 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5158 unsigned NumDefs = MI.getNumDefs();
5159
5160 SmallVector<DstOp, 8> OutputOpsPieces;
5161 SmallVector<Register, 8> OutputRegs;
5162 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5163
5164 // Instructions that perform register split will be inserted in basic block
5165 // where register is defined (basic block is in the next operand).
5166 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5167 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5168 UseIdx += 2, ++UseNo) {
5169 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5170 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5171 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5172 MIRBuilder, MRI);
5173 }
5174
5175 // Build PHIs with fewer elements.
5176 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5177 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5178 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5179 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5180 Phi.addDef(
5181 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5182 OutputRegs.push_back(Phi.getReg(0));
5183
5184 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5185 Phi.addUse(InputOpsPieces[j][i]);
5186 Phi.add(MI.getOperand(1 + j * 2 + 1));
5187 }
5188 }
5189
5190 // Set the insert point after the existing PHIs
5191 MachineBasicBlock &MBB = *MI.getParent();
5192 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5193
5194 // Merge small outputs into MI's def.
5195 if (NumLeftovers) {
5196 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5197 } else {
5198 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5199 }
5200
5201 MI.eraseFromParent();
5202 return Legalized;
5203}
5204
5207 unsigned TypeIdx,
5208 LLT NarrowTy) {
5209 const int NumDst = MI.getNumOperands() - 1;
5210 const Register SrcReg = MI.getOperand(NumDst).getReg();
5211 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5212 LLT SrcTy = MRI.getType(SrcReg);
5213
5214 if (TypeIdx != 1 || NarrowTy == DstTy)
5215 return UnableToLegalize;
5216
5217 // Requires compatible types. Otherwise SrcReg should have been defined by
5218 // merge-like instruction that would get artifact combined. Most likely
5219 // instruction that defines SrcReg has to perform more/fewer elements
5220 // legalization compatible with NarrowTy.
5221 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5222 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5223
5224 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5225 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5226 return UnableToLegalize;
5227
5228 // This is most likely DstTy (smaller then register size) packed in SrcTy
5229 // (larger then register size) and since unmerge was not combined it will be
5230 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5231 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5232
5233 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5234 //
5235 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5236 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5237 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5238 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5239 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5240 const int PartsPerUnmerge = NumDst / NumUnmerge;
5241
5242 for (int I = 0; I != NumUnmerge; ++I) {
5243 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5244
5245 for (int J = 0; J != PartsPerUnmerge; ++J)
5246 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5247 MIB.addUse(Unmerge.getReg(I));
5248 }
5249
5250 MI.eraseFromParent();
5251 return Legalized;
5252}
5253
5256 LLT NarrowTy) {
5257 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5258 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5259 // that should have been artifact combined. Most likely instruction that uses
5260 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5261 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5262 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5263 if (NarrowTy == SrcTy)
5264 return UnableToLegalize;
5265
5266 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5267 // is for old mir tests. Since the changes to more/fewer elements it should no
5268 // longer be possible to generate MIR like this when starting from llvm-ir
5269 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5270 if (TypeIdx == 1) {
5271 assert(SrcTy.isVector() && "Expected vector types");
5272 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5273 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5274 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5275 return UnableToLegalize;
5276 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5277 //
5278 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5279 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5280 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5281 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5282 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5283 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5284
5286 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5287 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5288 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5289 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5290 Elts.push_back(Unmerge.getReg(j));
5291 }
5292
5293 SmallVector<Register, 8> NarrowTyElts;
5294 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5295 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5296 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5297 ++i, Offset += NumNarrowTyElts) {
5298 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5299 NarrowTyElts.push_back(
5300 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5301 }
5302
5303 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5304 MI.eraseFromParent();
5305 return Legalized;
5306 }
5307
5308 assert(TypeIdx == 0 && "Bad type index");
5309 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5310 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5311 return UnableToLegalize;
5312
5313 // This is most likely SrcTy (smaller then register size) packed in DstTy
5314 // (larger then register size) and since merge was not combined it will be
5315 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5316 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5317
5318 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5319 //
5320 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5321 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5322 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5323 SmallVector<Register, 8> NarrowTyElts;
5324 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5325 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5326 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5327 for (unsigned i = 0; i < NumParts; ++i) {
5329 for (unsigned j = 0; j < NumElts; ++j)
5330 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5331 NarrowTyElts.push_back(
5332 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5333 }
5334
5335 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5336 MI.eraseFromParent();
5337 return Legalized;
5338}
5339
5342 unsigned TypeIdx,
5343 LLT NarrowVecTy) {
5344 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5345 Register InsertVal;
5346 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5347
5348 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5349 if (IsInsert)
5350 InsertVal = MI.getOperand(2).getReg();
5351
5352 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5353 LLT VecTy = MRI.getType(SrcVec);
5354
5355 // If the index is a constant, we can really break this down as you would
5356 // expect, and index into the target size pieces.
5357 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5358 if (MaybeCst) {
5359 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5360 // Avoid out of bounds indexing the pieces.
5361 if (IdxVal >= VecTy.getNumElements()) {
5362 MIRBuilder.buildUndef(DstReg);
5363 MI.eraseFromParent();
5364 return Legalized;
5365 }
5366
5367 if (!NarrowVecTy.isVector()) {
5368 SmallVector<Register, 8> SplitPieces;
5369 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5370 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5371 if (IsInsert) {
5372 SplitPieces[IdxVal] = InsertVal;
5373 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5374 } else {
5375 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5376 }
5377 } else {
5378 SmallVector<Register, 8> VecParts;
5379 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5380
5381 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5382 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5383 TargetOpcode::G_ANYEXT);
5384
5385 unsigned NewNumElts = NarrowVecTy.getNumElements();
5386
5387 LLT IdxTy = MRI.getType(Idx);
5388 int64_t PartIdx = IdxVal / NewNumElts;
5389 auto NewIdx =
5390 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5391
5392 if (IsInsert) {
5393 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5394
5395 // Use the adjusted index to insert into one of the subvectors.
5396 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5397 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5398 VecParts[PartIdx] = InsertPart.getReg(0);
5399
5400 // Recombine the inserted subvector with the others to reform the result
5401 // vector.
5402 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5403 } else {
5404 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5405 }
5406 }
5407
5408 MI.eraseFromParent();
5409 return Legalized;
5410 }
5411
5412 // With a variable index, we can't perform the operation in a smaller type, so
5413 // we're forced to expand this.
5414 //
5415 // TODO: We could emit a chain of compare/select to figure out which piece to
5416 // index.
5418}
5419
5422 LLT NarrowTy) {
5423 // FIXME: Don't know how to handle secondary types yet.
5424 if (TypeIdx != 0)
5425 return UnableToLegalize;
5426
5427 if (!NarrowTy.isByteSized()) {
5428 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5429 return UnableToLegalize;
5430 }
5431
5432 // This implementation doesn't work for atomics. Give up instead of doing
5433 // something invalid.
5434 if (LdStMI.isAtomic())
5435 return UnableToLegalize;
5436
5437 bool IsLoad = isa<GLoad>(LdStMI);
5438 Register ValReg = LdStMI.getReg(0);
5439 Register AddrReg = LdStMI.getPointerReg();
5440 LLT ValTy = MRI.getType(ValReg);
5441
5442 // FIXME: Do we need a distinct NarrowMemory legalize action?
5443 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5444 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5445 return UnableToLegalize;
5446 }
5447
5448 int NumParts = -1;
5449 int NumLeftover = -1;
5450 LLT LeftoverTy;
5451 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5452 if (IsLoad) {
5453 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5454 } else {
5455 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5456 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5457 NumParts = NarrowRegs.size();
5458 NumLeftover = NarrowLeftoverRegs.size();
5459 }
5460 }
5461
5462 if (NumParts == -1)
5463 return UnableToLegalize;
5464
5465 LLT PtrTy = MRI.getType(AddrReg);
5466 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5467
5468 unsigned TotalSize = ValTy.getSizeInBits();
5469
5470 // Split the load/store into PartTy sized pieces starting at Offset. If this
5471 // is a load, return the new registers in ValRegs. For a store, each elements
5472 // of ValRegs should be PartTy. Returns the next offset that needs to be
5473 // handled.
5474 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5475 auto MMO = LdStMI.getMMO();
5476 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5477 unsigned NumParts, unsigned Offset) -> unsigned {
5478 MachineFunction &MF = MIRBuilder.getMF();
5479 unsigned PartSize = PartTy.getSizeInBits();
5480 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5481 ++Idx) {
5482 unsigned ByteOffset = Offset / 8;
5483 Register NewAddrReg;
5484
5485 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5486 ByteOffset);
5487
5488 MachineMemOperand *NewMMO =
5489 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5490
5491 if (IsLoad) {
5492 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5493 ValRegs.push_back(Dst);
5494 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5495 } else {
5496 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5497 }
5498 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5499 }
5500
5501 return Offset;
5502 };
5503
5504 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5505 unsigned HandledOffset =
5506 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5507
5508 // Handle the rest of the register if this isn't an even type breakdown.
5509 if (LeftoverTy.isValid())
5510 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5511
5512 if (IsLoad) {
5513 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5514 LeftoverTy, NarrowLeftoverRegs);
5515 }
5516
5517 LdStMI.eraseFromParent();
5518 return Legalized;
5519}
5520
5523 LLT NarrowTy) {
5524 using namespace TargetOpcode;
5526 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5527
5528 switch (MI.getOpcode()) {
5529 case G_IMPLICIT_DEF:
5530 case G_TRUNC:
5531 case G_AND:
5532 case G_OR:
5533 case G_XOR:
5534 case G_ADD:
5535 case G_SUB:
5536 case G_MUL:
5537 case G_PTR_ADD:
5538 case G_SMULH:
5539 case G_UMULH:
5540 case G_FADD:
5541 case G_FMUL:
5542 case G_FSUB:
5543 case G_FNEG:
5544 case G_FABS:
5545 case G_FCANONICALIZE:
5546 case G_FDIV:
5547 case G_FREM:
5548 case G_FMA:
5549 case G_FMAD:
5550 case G_FPOW:
5551 case G_FEXP:
5552 case G_FEXP2:
5553 case G_FEXP10:
5554 case G_FLOG:
5555 case G_FLOG2:
5556 case G_FLOG10:
5557 case G_FLDEXP:
5558 case G_FNEARBYINT:
5559 case G_FCEIL:
5560 case G_FFLOOR:
5561 case G_FRINT:
5562 case G_INTRINSIC_LRINT:
5563 case G_INTRINSIC_LLRINT:
5564 case G_INTRINSIC_ROUND:
5565 case G_INTRINSIC_ROUNDEVEN:
5566 case G_LROUND:
5567 case G_LLROUND:
5568 case G_INTRINSIC_TRUNC:
5569 case G_FMODF:
5570 case G_FCOS:
5571 case G_FSIN:
5572 case G_FTAN:
5573 case G_FACOS:
5574 case G_FASIN:
5575 case G_FATAN:
5576 case G_FATAN2:
5577 case G_FCOSH:
5578 case G_FSINH:
5579 case G_FTANH:
5580 case G_FSQRT:
5581 case G_BSWAP:
5582 case G_BITREVERSE:
5583 case G_SDIV:
5584 case G_UDIV:
5585 case G_SREM:
5586 case G_UREM:
5587 case G_SDIVREM:
5588 case G_UDIVREM:
5589 case G_SMIN:
5590 case G_SMAX:
5591 case G_UMIN:
5592 case G_UMAX:
5593 case G_ABS:
5594 case G_FMINNUM:
5595 case G_FMAXNUM:
5596 case G_FMINNUM_IEEE:
5597 case G_FMAXNUM_IEEE:
5598 case G_FMINIMUM:
5599 case G_FMAXIMUM:
5600 case G_FMINIMUMNUM:
5601 case G_FMAXIMUMNUM:
5602 case G_FSHL:
5603 case G_FSHR:
5604 case G_ROTL:
5605 case G_ROTR:
5606 case G_FREEZE:
5607 case G_SADDSAT:
5608 case G_SSUBSAT:
5609 case G_UADDSAT:
5610 case G_USUBSAT:
5611 case G_UMULO:
5612 case G_SMULO:
5613 case G_SHL:
5614 case G_LSHR:
5615 case G_ASHR:
5616 case G_SSHLSAT:
5617 case G_USHLSAT:
5618 case G_CTLZ:
5619 case G_CTLZ_ZERO_UNDEF:
5620 case G_CTTZ:
5621 case G_CTTZ_ZERO_UNDEF:
5622 case G_CTPOP:
5623 case G_FCOPYSIGN:
5624 case G_ZEXT:
5625 case G_SEXT:
5626 case G_ANYEXT:
5627 case G_FPEXT:
5628 case G_FPTRUNC:
5629 case G_SITOFP:
5630 case G_UITOFP:
5631 case G_FPTOSI:
5632 case G_FPTOUI:
5633 case G_FPTOSI_SAT:
5634 case G_FPTOUI_SAT:
5635 case G_INTTOPTR:
5636 case G_PTRTOINT:
5637 case G_ADDRSPACE_CAST:
5638 case G_UADDO:
5639 case G_USUBO:
5640 case G_UADDE:
5641 case G_USUBE:
5642 case G_SADDO:
5643 case G_SSUBO:
5644 case G_SADDE:
5645 case G_SSUBE:
5646 case G_STRICT_FADD:
5647 case G_STRICT_FSUB:
5648 case G_STRICT_FMUL:
5649 case G_STRICT_FMA:
5650 case G_STRICT_FLDEXP:
5651 case G_FFREXP:
5652 case G_TRUNC_SSAT_S:
5653 case G_TRUNC_SSAT_U:
5654 case G_TRUNC_USAT_U:
5655 return fewerElementsVectorMultiEltType(GMI, NumElts);
5656 case G_ICMP:
5657 case G_FCMP:
5658 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5659 case G_IS_FPCLASS:
5660 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5661 case G_SELECT:
5662 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5663 return fewerElementsVectorMultiEltType(GMI, NumElts);
5664 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5665 case G_PHI:
5666 return fewerElementsVectorPhi(GMI, NumElts);
5667 case G_UNMERGE_VALUES:
5668 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5669 case G_BUILD_VECTOR:
5670 assert(TypeIdx == 0 && "not a vector type index");
5671 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5672 case G_CONCAT_VECTORS:
5673 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5674 return UnableToLegalize;
5675 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5676 case G_EXTRACT_VECTOR_ELT:
5677 case G_INSERT_VECTOR_ELT:
5678 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5679 case G_LOAD:
5680 case G_STORE:
5681 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5682 case G_SEXT_INREG:
5683 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5685 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5686 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5687 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5688 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5689 case G_SHUFFLE_VECTOR:
5690 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5691 case G_FPOWI:
5692 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5693 case G_BITCAST:
5694 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5695 case G_INTRINSIC_FPTRUNC_ROUND:
5696 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5697 default:
5698 return UnableToLegalize;
5699 }
5700}
5701
5704 LLT NarrowTy) {
5705 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5706 "Not a bitcast operation");
5707
5708 if (TypeIdx != 0)
5709 return UnableToLegalize;
5710
5711 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5712
5713 unsigned NewElemCount =
5714 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5715 SmallVector<Register> SrcVRegs, BitcastVRegs;
5716 if (NewElemCount == 1) {
5717 LLT SrcNarrowTy = SrcTy.getElementType();
5718
5719 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5720 getUnmergeResults(SrcVRegs, *Unmerge);
5721 } else {
5722 LLT SrcNarrowTy =
5724
5725 // Split the Src and Dst Reg into smaller registers
5726 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5727 return UnableToLegalize;
5728 }
5729
5730 // Build new smaller bitcast instructions
5731 // Not supporting Leftover types for now but will have to
5732 for (Register Reg : SrcVRegs)
5733 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5734
5735 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5736 MI.eraseFromParent();
5737 return Legalized;
5738}
5739
5741 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5742 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5743 if (TypeIdx != 0)
5744 return UnableToLegalize;
5745
5746 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5747 MI.getFirst3RegLLTs();
5748 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5749 // The shuffle should be canonicalized by now.
5750 if (DstTy != Src1Ty)
5751 return UnableToLegalize;
5752 if (DstTy != Src2Ty)
5753 return UnableToLegalize;
5754
5755 if (!isPowerOf2_32(DstTy.getNumElements()))
5756 return UnableToLegalize;
5757
5758 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5759 // Further legalization attempts will be needed to do split further.
5760 NarrowTy =
5761 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5762 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5763
5764 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5765 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5766 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5767 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5768 SplitSrc2Regs[1]};
5769
5770 Register Hi, Lo;
5771
5772 // If Lo or Hi uses elements from at most two of the four input vectors, then
5773 // express it as a vector shuffle of those two inputs. Otherwise extract the
5774 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5776 for (unsigned High = 0; High < 2; ++High) {
5777 Register &Output = High ? Hi : Lo;
5778
5779 // Build a shuffle mask for the output, discovering on the fly which
5780 // input vectors to use as shuffle operands (recorded in InputUsed).
5781 // If building a suitable shuffle vector proves too hard, then bail
5782 // out with useBuildVector set.
5783 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5784 unsigned FirstMaskIdx = High * NewElts;
5785 bool UseBuildVector = false;
5786 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5787 // The mask element. This indexes into the input.
5788 int Idx = Mask[FirstMaskIdx + MaskOffset];
5789
5790 // The input vector this mask element indexes into.
5791 unsigned Input = (unsigned)Idx / NewElts;
5792
5793 if (Input >= std::size(Inputs)) {
5794 // The mask element does not index into any input vector.
5795 Ops.push_back(-1);
5796 continue;
5797 }
5798
5799 // Turn the index into an offset from the start of the input vector.
5800 Idx -= Input * NewElts;
5801
5802 // Find or create a shuffle vector operand to hold this input.
5803 unsigned OpNo;
5804 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5805 if (InputUsed[OpNo] == Input) {
5806 // This input vector is already an operand.
5807 break;
5808 } else if (InputUsed[OpNo] == -1U) {
5809 // Create a new operand for this input vector.
5810 InputUsed[OpNo] = Input;
5811 break;
5812 }
5813 }
5814
5815 if (OpNo >= std::size(InputUsed)) {
5816 // More than two input vectors used! Give up on trying to create a
5817 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5818 UseBuildVector = true;
5819 break;
5820 }
5821
5822 // Add the mask index for the new shuffle vector.
5823 Ops.push_back(Idx + OpNo * NewElts);
5824 }
5825
5826 if (UseBuildVector) {
5827 LLT EltTy = NarrowTy.getElementType();
5829
5830 // Extract the input elements by hand.
5831 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5832 // The mask element. This indexes into the input.
5833 int Idx = Mask[FirstMaskIdx + MaskOffset];
5834
5835 // The input vector this mask element indexes into.
5836 unsigned Input = (unsigned)Idx / NewElts;
5837
5838 if (Input >= std::size(Inputs)) {
5839 // The mask element is "undef" or indexes off the end of the input.
5840 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5841 continue;
5842 }
5843
5844 // Turn the index into an offset from the start of the input vector.
5845 Idx -= Input * NewElts;
5846
5847 // Extract the vector element by hand.
5848 SVOps.push_back(MIRBuilder
5849 .buildExtractVectorElement(
5850 EltTy, Inputs[Input],
5851 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5852 .getReg(0));
5853 }
5854
5855 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5856 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5857 } else if (InputUsed[0] == -1U) {
5858 // No input vectors were used! The result is undefined.
5859 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5860 } else if (NewElts == 1) {
5861 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5862 } else {
5863 Register Op0 = Inputs[InputUsed[0]];
5864 // If only one input was used, use an undefined vector for the other.
5865 Register Op1 = InputUsed[1] == -1U
5866 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5867 : Inputs[InputUsed[1]];
5868 // At least one input vector was used. Create a new shuffle vector.
5869 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5870 }
5871
5872 Ops.clear();
5873 }
5874
5875 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5876 MI.eraseFromParent();
5877 return Legalized;
5878}
5879
5881 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5882 auto &RdxMI = cast<GVecReduce>(MI);
5883
5884 if (TypeIdx != 1)
5885 return UnableToLegalize;
5886
5887 // The semantics of the normal non-sequential reductions allow us to freely
5888 // re-associate the operation.
5889 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5890
5891 if (NarrowTy.isVector() &&
5892 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5893 return UnableToLegalize;
5894
5895 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5896 SmallVector<Register> SplitSrcs;
5897 // If NarrowTy is a scalar then we're being asked to scalarize.
5898 const unsigned NumParts =
5899 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5900 : SrcTy.getNumElements();
5901
5902 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5903 if (NarrowTy.isScalar()) {
5904 if (DstTy != NarrowTy)
5905 return UnableToLegalize; // FIXME: handle implicit extensions.
5906
5907 if (isPowerOf2_32(NumParts)) {
5908 // Generate a tree of scalar operations to reduce the critical path.
5909 SmallVector<Register> PartialResults;
5910 unsigned NumPartsLeft = NumParts;
5911 while (NumPartsLeft > 1) {
5912 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5913 PartialResults.emplace_back(
5915 .buildInstr(ScalarOpc, {NarrowTy},
5916 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5917 .getReg(0));
5918 }
5919 SplitSrcs = PartialResults;
5920 PartialResults.clear();
5921 NumPartsLeft = SplitSrcs.size();
5922 }
5923 assert(SplitSrcs.size() == 1);
5924 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5925 MI.eraseFromParent();
5926 return Legalized;
5927 }
5928 // If we can't generate a tree, then just do sequential operations.
5929 Register Acc = SplitSrcs[0];
5930 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5931 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5932 .getReg(0);
5933 MIRBuilder.buildCopy(DstReg, Acc);
5934 MI.eraseFromParent();
5935 return Legalized;
5936 }
5937 SmallVector<Register> PartialReductions;
5938 for (unsigned Part = 0; Part < NumParts; ++Part) {
5939 PartialReductions.push_back(
5940 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5941 .getReg(0));
5942 }
5943
5944 // If the types involved are powers of 2, we can generate intermediate vector
5945 // ops, before generating a final reduction operation.
5946 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5947 isPowerOf2_32(NarrowTy.getNumElements())) {
5948 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5949 }
5950
5951 Register Acc = PartialReductions[0];
5952 for (unsigned Part = 1; Part < NumParts; ++Part) {
5953 if (Part == NumParts - 1) {
5954 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5955 {Acc, PartialReductions[Part]});
5956 } else {
5957 Acc = MIRBuilder
5958 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5959 .getReg(0);
5960 }
5961 }
5962 MI.eraseFromParent();
5963 return Legalized;
5964}
5965
5968 unsigned int TypeIdx,
5969 LLT NarrowTy) {
5970 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5971 MI.getFirst3RegLLTs();
5972 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5973 DstTy != NarrowTy)
5974 return UnableToLegalize;
5975
5976 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5977 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5978 "Unexpected vecreduce opcode");
5979 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5980 ? TargetOpcode::G_FADD
5981 : TargetOpcode::G_FMUL;
5982
5983 SmallVector<Register> SplitSrcs;
5984 unsigned NumParts = SrcTy.getNumElements();
5985 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5986 Register Acc = ScalarReg;
5987 for (unsigned i = 0; i < NumParts; i++)
5988 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5989 .getReg(0);
5990
5991 MIRBuilder.buildCopy(DstReg, Acc);
5992 MI.eraseFromParent();
5993 return Legalized;
5994}
5995
5997LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5998 LLT SrcTy, LLT NarrowTy,
5999 unsigned ScalarOpc) {
6000 SmallVector<Register> SplitSrcs;
6001 // Split the sources into NarrowTy size pieces.
6002 extractParts(SrcReg, NarrowTy,
6003 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
6004 MIRBuilder, MRI);
6005 // We're going to do a tree reduction using vector operations until we have
6006 // one NarrowTy size value left.
6007 while (SplitSrcs.size() > 1) {
6008 SmallVector<Register> PartialRdxs;
6009 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6010 Register LHS = SplitSrcs[Idx];
6011 Register RHS = SplitSrcs[Idx + 1];
6012 // Create the intermediate vector op.
6013 Register Res =
6014 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6015 PartialRdxs.push_back(Res);
6016 }
6017 SplitSrcs = std::move(PartialRdxs);
6018 }
6019 // Finally generate the requested NarrowTy based reduction.
6020 Observer.changingInstr(MI);
6021 MI.getOperand(1).setReg(SplitSrcs[0]);
6022 Observer.changedInstr(MI);
6023 return Legalized;
6024}
6025
6028 const LLT HalfTy, const LLT AmtTy) {
6029
6030 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6031 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6032 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6033
6034 if (Amt.isZero()) {
6035 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6036 MI.eraseFromParent();
6037 return Legalized;
6038 }
6039
6040 LLT NVT = HalfTy;
6041 unsigned NVTBits = HalfTy.getSizeInBits();
6042 unsigned VTBits = 2 * NVTBits;
6043
6044 SrcOp Lo(Register(0)), Hi(Register(0));
6045 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6046 if (Amt.ugt(VTBits)) {
6047 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6048 } else if (Amt.ugt(NVTBits)) {
6049 Lo = MIRBuilder.buildConstant(NVT, 0);
6050 Hi = MIRBuilder.buildShl(NVT, InL,
6051 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6052 } else if (Amt == NVTBits) {
6053 Lo = MIRBuilder.buildConstant(NVT, 0);
6054 Hi = InL;
6055 } else {
6056 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6057 auto OrLHS =
6058 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6059 auto OrRHS = MIRBuilder.buildLShr(
6060 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6061 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6062 }
6063 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6064 if (Amt.ugt(VTBits)) {
6065 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6066 } else if (Amt.ugt(NVTBits)) {
6067 Lo = MIRBuilder.buildLShr(NVT, InH,
6068 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6069 Hi = MIRBuilder.buildConstant(NVT, 0);
6070 } else if (Amt == NVTBits) {
6071 Lo = InH;
6072 Hi = MIRBuilder.buildConstant(NVT, 0);
6073 } else {
6074 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6075
6076 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6077 auto OrRHS = MIRBuilder.buildShl(
6078 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6079
6080 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6081 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6082 }
6083 } else {
6084 if (Amt.ugt(VTBits)) {
6085 Hi = Lo = MIRBuilder.buildAShr(
6086 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6087 } else if (Amt.ugt(NVTBits)) {
6088 Lo = MIRBuilder.buildAShr(NVT, InH,
6089 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6090 Hi = MIRBuilder.buildAShr(NVT, InH,
6091 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6092 } else if (Amt == NVTBits) {
6093 Lo = InH;
6094 Hi = MIRBuilder.buildAShr(NVT, InH,
6095 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6096 } else {
6097 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6098
6099 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6100 auto OrRHS = MIRBuilder.buildShl(
6101 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6102
6103 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6104 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6105 }
6106 }
6107
6108 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6109 MI.eraseFromParent();
6110
6111 return Legalized;
6112}
6113
6116 LLT RequestedTy) {
6117 if (TypeIdx == 1) {
6118 Observer.changingInstr(MI);
6119 narrowScalarSrc(MI, RequestedTy, 2);
6120 Observer.changedInstr(MI);
6121 return Legalized;
6122 }
6123
6124 Register DstReg = MI.getOperand(0).getReg();
6125 LLT DstTy = MRI.getType(DstReg);
6126 if (DstTy.isVector())
6127 return UnableToLegalize;
6128
6129 Register Amt = MI.getOperand(2).getReg();
6130 LLT ShiftAmtTy = MRI.getType(Amt);
6131 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6132 if (DstEltSize % 2 != 0)
6133 return UnableToLegalize;
6134
6135 // Check if we should use multi-way splitting instead of recursive binary
6136 // splitting.
6137 //
6138 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6139 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6140 // and dependency chains created by usual binary splitting approach
6141 // (128->64->32).
6142 //
6143 // The >= 8 parts threshold ensures we only use this optimization when binary
6144 // splitting would require multiple recursive passes, avoiding overhead for
6145 // simple 2-way splits where binary approach is sufficient.
6146 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6147 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6148 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6149 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6150 // steps).
6151 if (NumParts >= 8)
6152 return narrowScalarShiftMultiway(MI, RequestedTy);
6153 }
6154
6155 // Fall back to binary splitting:
6156 // Ignore the input type. We can only go to exactly half the size of the
6157 // input. If that isn't small enough, the resulting pieces will be further
6158 // legalized.
6159 const unsigned NewBitSize = DstEltSize / 2;
6160 const LLT HalfTy = LLT::scalar(NewBitSize);
6161 const LLT CondTy = LLT::scalar(1);
6162
6163 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6164 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6165 ShiftAmtTy);
6166 }
6167
6168 // TODO: Expand with known bits.
6169
6170 // Handle the fully general expansion by an unknown amount.
6171 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6172
6173 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6174 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6175 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6176
6177 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6178 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6179
6180 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6181 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6182 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6183
6184 Register ResultRegs[2];
6185 switch (MI.getOpcode()) {
6186 case TargetOpcode::G_SHL: {
6187 // Short: ShAmt < NewBitSize
6188 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6189
6190 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6191 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6192 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6193
6194 // Long: ShAmt >= NewBitSize
6195 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6196 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6197
6198 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6199 auto Hi = MIRBuilder.buildSelect(
6200 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6201
6202 ResultRegs[0] = Lo.getReg(0);
6203 ResultRegs[1] = Hi.getReg(0);
6204 break;
6205 }
6206 case TargetOpcode::G_LSHR:
6207 case TargetOpcode::G_ASHR: {
6208 // Short: ShAmt < NewBitSize
6209 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6210
6211 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6212 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6213 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6214
6215 // Long: ShAmt >= NewBitSize
6217 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6218 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6219 } else {
6220 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6221 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6222 }
6223 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6224 {InH, AmtExcess}); // Lo from Hi part.
6225
6226 auto Lo = MIRBuilder.buildSelect(
6227 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6228
6229 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6230
6231 ResultRegs[0] = Lo.getReg(0);
6232 ResultRegs[1] = Hi.getReg(0);
6233 break;
6234 }
6235 default:
6236 llvm_unreachable("not a shift");
6237 }
6238
6239 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6240 MI.eraseFromParent();
6241 return Legalized;
6242}
6243
6245 unsigned PartIdx,
6246 unsigned NumParts,
6247 ArrayRef<Register> SrcParts,
6248 const ShiftParams &Params,
6249 LLT TargetTy, LLT ShiftAmtTy) {
6250 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6251 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6252 assert(WordShiftConst && BitShiftConst && "Expected constants");
6253
6254 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6255 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6256 const bool NeedsInterWordShift = ShiftBits != 0;
6257
6258 switch (Opcode) {
6259 case TargetOpcode::G_SHL: {
6260 // Data moves from lower indices to higher indices
6261 // If this part would come from a source beyond our range, it's zero
6262 if (PartIdx < ShiftWords)
6263 return Params.Zero;
6264
6265 unsigned SrcIdx = PartIdx - ShiftWords;
6266 if (!NeedsInterWordShift)
6267 return SrcParts[SrcIdx];
6268
6269 // Combine shifted main part with carry from previous part
6270 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6271 if (SrcIdx > 0) {
6272 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6273 Params.InvBitShift);
6274 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6275 }
6276 return Hi.getReg(0);
6277 }
6278
6279 case TargetOpcode::G_LSHR: {
6280 unsigned SrcIdx = PartIdx + ShiftWords;
6281 if (SrcIdx >= NumParts)
6282 return Params.Zero;
6283 if (!NeedsInterWordShift)
6284 return SrcParts[SrcIdx];
6285
6286 // Combine shifted main part with carry from next part
6287 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6288 if (SrcIdx + 1 < NumParts) {
6289 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6290 Params.InvBitShift);
6291 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6292 }
6293 return Lo.getReg(0);
6294 }
6295
6296 case TargetOpcode::G_ASHR: {
6297 // Like LSHR but preserves sign bit
6298 unsigned SrcIdx = PartIdx + ShiftWords;
6299 if (SrcIdx >= NumParts)
6300 return Params.SignBit;
6301 if (!NeedsInterWordShift)
6302 return SrcParts[SrcIdx];
6303
6304 // Only the original MSB part uses arithmetic shift to preserve sign. All
6305 // other parts use logical shift since they're just moving data bits.
6306 auto Lo =
6307 (SrcIdx == NumParts - 1)
6308 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6309 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6310 Register HiSrc =
6311 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6312 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6313 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6314 }
6315
6316 default:
6317 llvm_unreachable("not a shift");
6318 }
6319}
6320
6322 Register MainOperand,
6323 Register ShiftAmt,
6324 LLT TargetTy,
6325 Register CarryOperand) {
6326 // This helper generates a single output part for variable shifts by combining
6327 // the main operand (shifted by BitShift) with carry bits from an adjacent
6328 // part.
6329
6330 // For G_ASHR, individual parts don't have their own sign bit, only the
6331 // complete value does. So we use LSHR for the main operand shift in ASHR
6332 // context.
6333 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6334 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6335 : Opcode;
6336
6337 // Perform the primary shift on the main operand
6338 Register MainShifted =
6339 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6340 .getReg(0);
6341
6342 // No carry operand available
6343 if (!CarryOperand.isValid())
6344 return MainShifted;
6345
6346 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6347 // so carry bits aren't needed.
6348 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6349 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6350 LLT BoolTy = LLT::scalar(1);
6351 auto IsZeroBitShift =
6352 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6353
6354 // Extract bits from the adjacent part that will "carry over" into this part.
6355 // The carry direction is opposite to the main shift direction, so we can
6356 // align the two shifted values before combining them with OR.
6357
6358 // Determine the carry shift opcode (opposite direction)
6359 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6360 : TargetOpcode::G_SHL;
6361
6362 // Calculate inverse shift amount: BitWidth - ShiftAmt
6363 auto TargetBitsConst =
6364 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6365 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6366
6367 // Shift the carry operand
6368 Register CarryBits =
6370 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6371 .getReg(0);
6372
6373 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6374 // TargetBits which would be poison for the individual carry shift operation).
6375 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6376 Register SafeCarryBits =
6377 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6378 .getReg(0);
6379
6380 // Combine the main shifted part with the carry bits
6381 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6382}
6383
6386 const APInt &Amt,
6387 LLT TargetTy,
6388 LLT ShiftAmtTy) {
6389 // Any wide shift can be decomposed into WordShift + BitShift components.
6390 // When shift amount is known constant, directly compute the decomposition
6391 // values and generate constant registers.
6392 Register DstReg = MI.getOperand(0).getReg();
6393 Register SrcReg = MI.getOperand(1).getReg();
6394 LLT DstTy = MRI.getType(DstReg);
6395
6396 const unsigned DstBits = DstTy.getScalarSizeInBits();
6397 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6398 const unsigned NumParts = DstBits / TargetBits;
6399
6400 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6401
6402 // When the shift amount is known at compile time, we just calculate which
6403 // source parts contribute to each output part.
6404
6405 SmallVector<Register, 8> SrcParts;
6406 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6407
6408 if (Amt.isZero()) {
6409 // No shift needed, just copy
6410 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6411 MI.eraseFromParent();
6412 return Legalized;
6413 }
6414
6415 ShiftParams Params;
6416 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6417 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6418
6419 // Generate constants and values needed by all shift types
6420 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6421 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6422 Params.InvBitShift =
6423 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6424 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6425
6426 // For ASHR, we need the sign-extended value to fill shifted-out positions
6427 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6428 Params.SignBit =
6430 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6431 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6432 .getReg(0);
6433
6434 SmallVector<Register, 8> DstParts(NumParts);
6435 for (unsigned I = 0; I < NumParts; ++I)
6436 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6437 Params, TargetTy, ShiftAmtTy);
6438
6439 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6440 MI.eraseFromParent();
6441 return Legalized;
6442}
6443
6446 Register DstReg = MI.getOperand(0).getReg();
6447 Register SrcReg = MI.getOperand(1).getReg();
6448 Register AmtReg = MI.getOperand(2).getReg();
6449 LLT DstTy = MRI.getType(DstReg);
6450 LLT ShiftAmtTy = MRI.getType(AmtReg);
6451
6452 const unsigned DstBits = DstTy.getScalarSizeInBits();
6453 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6454 const unsigned NumParts = DstBits / TargetBits;
6455
6456 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6457 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6458
6459 // If the shift amount is known at compile time, we can use direct indexing
6460 // instead of generating select chains in the general case.
6461 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6462 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6463 ShiftAmtTy);
6464
6465 // For runtime-variable shift amounts, we must generate a more complex
6466 // sequence that handles all possible shift values using select chains.
6467
6468 // Split the input into target-sized pieces
6469 SmallVector<Register, 8> SrcParts;
6470 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6471
6472 // Shifting by zero should be a no-op.
6473 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6474 LLT BoolTy = LLT::scalar(1);
6475 auto IsZeroShift =
6476 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6477
6478 // Any wide shift can be decomposed into two components:
6479 // 1. WordShift: number of complete target-sized words to shift
6480 // 2. BitShift: number of bits to shift within each word
6481 //
6482 // Example: 128-bit >> 50 with 32-bit target:
6483 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6484 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6485 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6486 auto TargetBitsLog2Const =
6487 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6488 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6489
6490 Register WordShift =
6491 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6492 Register BitShift =
6493 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6494
6495 // Fill values:
6496 // - SHL/LSHR: fill with zeros
6497 // - ASHR: fill with sign-extended MSB
6498 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6499
6500 Register FillValue;
6501 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6502 auto TargetBitsMinusOneConst =
6503 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6504 FillValue = MIRBuilder
6505 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6506 TargetBitsMinusOneConst)
6507 .getReg(0);
6508 } else {
6509 FillValue = ZeroReg;
6510 }
6511
6512 SmallVector<Register, 8> DstParts(NumParts);
6513
6514 // For each output part, generate a select chain that chooses the correct
6515 // result based on the runtime WordShift value. This handles all possible
6516 // word shift amounts by pre-calculating what each would produce.
6517 for (unsigned I = 0; I < NumParts; ++I) {
6518 // Initialize with appropriate default value for this shift type
6519 Register InBoundsResult = FillValue;
6520
6521 // clang-format off
6522 // Build a branchless select chain by pre-computing results for all possible
6523 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6524 //
6525 // K=0: select(WordShift==0, result0, FillValue)
6526 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6527 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6528 // clang-format on
6529 for (unsigned K = 0; K < NumParts; ++K) {
6530 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6531 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6532 WordShift, WordShiftKConst);
6533
6534 // Calculate source indices for this word shift
6535 //
6536 // For 4-part 128-bit value with K=1 word shift:
6537 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6538 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6539 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6540 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6541 int MainSrcIdx;
6542 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6543
6544 switch (MI.getOpcode()) {
6545 case TargetOpcode::G_SHL:
6546 MainSrcIdx = (int)I - (int)K;
6547 CarrySrcIdx = MainSrcIdx - 1;
6548 break;
6549 case TargetOpcode::G_LSHR:
6550 case TargetOpcode::G_ASHR:
6551 MainSrcIdx = (int)I + (int)K;
6552 CarrySrcIdx = MainSrcIdx + 1;
6553 break;
6554 default:
6555 llvm_unreachable("Not a shift");
6556 }
6557
6558 // Check bounds and build the result for this word shift
6559 Register ResultForK;
6560 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6561 Register MainOp = SrcParts[MainSrcIdx];
6562 Register CarryOp;
6563
6564 // Determine carry operand with bounds checking
6565 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6566 CarryOp = SrcParts[CarrySrcIdx];
6567 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6568 CarrySrcIdx >= (int)NumParts)
6569 CarryOp = FillValue; // Use sign extension
6570
6571 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6572 TargetTy, CarryOp);
6573 } else {
6574 // Out of bounds - use fill value for this k
6575 ResultForK = FillValue;
6576 }
6577
6578 // Select this result if WordShift equals k
6579 InBoundsResult =
6581 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6582 .getReg(0);
6583 }
6584
6585 // Handle zero-shift special case: if shift is 0, use original input
6586 DstParts[I] =
6588 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6589 .getReg(0);
6590 }
6591
6592 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6593 MI.eraseFromParent();
6594 return Legalized;
6595}
6596
6599 LLT MoreTy) {
6600 assert(TypeIdx == 0 && "Expecting only Idx 0");
6601
6602 Observer.changingInstr(MI);
6603 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6604 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6605 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6606 moreElementsVectorSrc(MI, MoreTy, I);
6607 }
6608
6609 MachineBasicBlock &MBB = *MI.getParent();
6610 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6611 moreElementsVectorDst(MI, MoreTy, 0);
6612 Observer.changedInstr(MI);
6613 return Legalized;
6614}
6615
6616MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6617 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6618 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6619
6620 switch (Opcode) {
6621 default:
6623 "getNeutralElementForVecReduce called with invalid opcode!");
6624 case TargetOpcode::G_VECREDUCE_ADD:
6625 case TargetOpcode::G_VECREDUCE_OR:
6626 case TargetOpcode::G_VECREDUCE_XOR:
6627 case TargetOpcode::G_VECREDUCE_UMAX:
6628 return MIRBuilder.buildConstant(Ty, 0);
6629 case TargetOpcode::G_VECREDUCE_MUL:
6630 return MIRBuilder.buildConstant(Ty, 1);
6631 case TargetOpcode::G_VECREDUCE_AND:
6632 case TargetOpcode::G_VECREDUCE_UMIN:
6634 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6635 case TargetOpcode::G_VECREDUCE_SMAX:
6637 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6638 case TargetOpcode::G_VECREDUCE_SMIN:
6640 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6641 case TargetOpcode::G_VECREDUCE_FADD:
6642 return MIRBuilder.buildFConstant(Ty, -0.0);
6643 case TargetOpcode::G_VECREDUCE_FMUL:
6644 return MIRBuilder.buildFConstant(Ty, 1.0);
6645 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6646 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6647 assert(false && "getNeutralElementForVecReduce unimplemented for "
6648 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6649 }
6650 llvm_unreachable("switch expected to return!");
6651}
6652
6655 LLT MoreTy) {
6656 unsigned Opc = MI.getOpcode();
6657 switch (Opc) {
6658 case TargetOpcode::G_IMPLICIT_DEF:
6659 case TargetOpcode::G_LOAD: {
6660 if (TypeIdx != 0)
6661 return UnableToLegalize;
6662 Observer.changingInstr(MI);
6663 moreElementsVectorDst(MI, MoreTy, 0);
6664 Observer.changedInstr(MI);
6665 return Legalized;
6666 }
6667 case TargetOpcode::G_STORE:
6668 if (TypeIdx != 0)
6669 return UnableToLegalize;
6670 Observer.changingInstr(MI);
6671 moreElementsVectorSrc(MI, MoreTy, 0);
6672 Observer.changedInstr(MI);
6673 return Legalized;
6674 case TargetOpcode::G_AND:
6675 case TargetOpcode::G_OR:
6676 case TargetOpcode::G_XOR:
6677 case TargetOpcode::G_ADD:
6678 case TargetOpcode::G_SUB:
6679 case TargetOpcode::G_MUL:
6680 case TargetOpcode::G_FADD:
6681 case TargetOpcode::G_FSUB:
6682 case TargetOpcode::G_FMUL:
6683 case TargetOpcode::G_FDIV:
6684 case TargetOpcode::G_FCOPYSIGN:
6685 case TargetOpcode::G_UADDSAT:
6686 case TargetOpcode::G_USUBSAT:
6687 case TargetOpcode::G_SADDSAT:
6688 case TargetOpcode::G_SSUBSAT:
6689 case TargetOpcode::G_SMIN:
6690 case TargetOpcode::G_SMAX:
6691 case TargetOpcode::G_UMIN:
6692 case TargetOpcode::G_UMAX:
6693 case TargetOpcode::G_FMINNUM:
6694 case TargetOpcode::G_FMAXNUM:
6695 case TargetOpcode::G_FMINNUM_IEEE:
6696 case TargetOpcode::G_FMAXNUM_IEEE:
6697 case TargetOpcode::G_FMINIMUM:
6698 case TargetOpcode::G_FMAXIMUM:
6699 case TargetOpcode::G_FMINIMUMNUM:
6700 case TargetOpcode::G_FMAXIMUMNUM:
6701 case TargetOpcode::G_STRICT_FADD:
6702 case TargetOpcode::G_STRICT_FSUB:
6703 case TargetOpcode::G_STRICT_FMUL: {
6704 Observer.changingInstr(MI);
6705 moreElementsVectorSrc(MI, MoreTy, 1);
6706 moreElementsVectorSrc(MI, MoreTy, 2);
6707 moreElementsVectorDst(MI, MoreTy, 0);
6708 Observer.changedInstr(MI);
6709 return Legalized;
6710 }
6711 case TargetOpcode::G_SHL:
6712 case TargetOpcode::G_ASHR:
6713 case TargetOpcode::G_LSHR: {
6714 Observer.changingInstr(MI);
6715 moreElementsVectorSrc(MI, MoreTy, 1);
6716 // The shift operand may have a different scalar type from the source and
6717 // destination operands.
6718 LLT ShiftMoreTy = MoreTy.changeElementType(
6719 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6720 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6721 moreElementsVectorDst(MI, MoreTy, 0);
6722 Observer.changedInstr(MI);
6723 return Legalized;
6724 }
6725 case TargetOpcode::G_FMA:
6726 case TargetOpcode::G_STRICT_FMA:
6727 case TargetOpcode::G_FSHR:
6728 case TargetOpcode::G_FSHL: {
6729 Observer.changingInstr(MI);
6730 moreElementsVectorSrc(MI, MoreTy, 1);
6731 moreElementsVectorSrc(MI, MoreTy, 2);
6732 moreElementsVectorSrc(MI, MoreTy, 3);
6733 moreElementsVectorDst(MI, MoreTy, 0);
6734 Observer.changedInstr(MI);
6735 return Legalized;
6736 }
6737 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6738 case TargetOpcode::G_EXTRACT:
6739 if (TypeIdx != 1)
6740 return UnableToLegalize;
6741 Observer.changingInstr(MI);
6742 moreElementsVectorSrc(MI, MoreTy, 1);
6743 Observer.changedInstr(MI);
6744 return Legalized;
6745 case TargetOpcode::G_INSERT:
6746 case TargetOpcode::G_INSERT_VECTOR_ELT:
6747 case TargetOpcode::G_FREEZE:
6748 case TargetOpcode::G_FNEG:
6749 case TargetOpcode::G_FABS:
6750 case TargetOpcode::G_FSQRT:
6751 case TargetOpcode::G_FCEIL:
6752 case TargetOpcode::G_FFLOOR:
6753 case TargetOpcode::G_FNEARBYINT:
6754 case TargetOpcode::G_FRINT:
6755 case TargetOpcode::G_INTRINSIC_ROUND:
6756 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6757 case TargetOpcode::G_INTRINSIC_TRUNC:
6758 case TargetOpcode::G_BITREVERSE:
6759 case TargetOpcode::G_BSWAP:
6760 case TargetOpcode::G_FCANONICALIZE:
6761 case TargetOpcode::G_SEXT_INREG:
6762 case TargetOpcode::G_ABS:
6763 case TargetOpcode::G_CTLZ:
6764 case TargetOpcode::G_CTPOP:
6765 if (TypeIdx != 0)
6766 return UnableToLegalize;
6767 Observer.changingInstr(MI);
6768 moreElementsVectorSrc(MI, MoreTy, 1);
6769 moreElementsVectorDst(MI, MoreTy, 0);
6770 Observer.changedInstr(MI);
6771 return Legalized;
6772 case TargetOpcode::G_SELECT: {
6773 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6774 if (TypeIdx == 1) {
6775 if (!CondTy.isScalar() ||
6776 DstTy.getElementCount() != MoreTy.getElementCount())
6777 return UnableToLegalize;
6778
6779 // This is turning a scalar select of vectors into a vector
6780 // select. Broadcast the select condition.
6781 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6782 Observer.changingInstr(MI);
6783 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6784 Observer.changedInstr(MI);
6785 return Legalized;
6786 }
6787
6788 if (CondTy.isVector())
6789 return UnableToLegalize;
6790
6791 Observer.changingInstr(MI);
6792 moreElementsVectorSrc(MI, MoreTy, 2);
6793 moreElementsVectorSrc(MI, MoreTy, 3);
6794 moreElementsVectorDst(MI, MoreTy, 0);
6795 Observer.changedInstr(MI);
6796 return Legalized;
6797 }
6798 case TargetOpcode::G_UNMERGE_VALUES:
6799 return UnableToLegalize;
6800 case TargetOpcode::G_PHI:
6801 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6802 case TargetOpcode::G_SHUFFLE_VECTOR:
6803 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6804 case TargetOpcode::G_BUILD_VECTOR: {
6806 for (auto Op : MI.uses()) {
6807 Elts.push_back(Op.getReg());
6808 }
6809
6810 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6811 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6812 }
6813
6814 MIRBuilder.buildDeleteTrailingVectorElements(
6815 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6816 MI.eraseFromParent();
6817 return Legalized;
6818 }
6819 case TargetOpcode::G_SEXT:
6820 case TargetOpcode::G_ZEXT:
6821 case TargetOpcode::G_ANYEXT:
6822 case TargetOpcode::G_TRUNC:
6823 case TargetOpcode::G_FPTRUNC:
6824 case TargetOpcode::G_FPEXT:
6825 case TargetOpcode::G_FPTOSI:
6826 case TargetOpcode::G_FPTOUI:
6827 case TargetOpcode::G_FPTOSI_SAT:
6828 case TargetOpcode::G_FPTOUI_SAT:
6829 case TargetOpcode::G_SITOFP:
6830 case TargetOpcode::G_UITOFP: {
6831 Observer.changingInstr(MI);
6832 LLT SrcExtTy;
6833 LLT DstExtTy;
6834 if (TypeIdx == 0) {
6835 DstExtTy = MoreTy;
6836 SrcExtTy = MoreTy.changeElementType(
6837 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6838 } else {
6839 DstExtTy = MoreTy.changeElementType(
6840 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6841 SrcExtTy = MoreTy;
6842 }
6843 moreElementsVectorSrc(MI, SrcExtTy, 1);
6844 moreElementsVectorDst(MI, DstExtTy, 0);
6845 Observer.changedInstr(MI);
6846 return Legalized;
6847 }
6848 case TargetOpcode::G_ICMP:
6849 case TargetOpcode::G_FCMP: {
6850 if (TypeIdx != 1)
6851 return UnableToLegalize;
6852
6853 Observer.changingInstr(MI);
6854 moreElementsVectorSrc(MI, MoreTy, 2);
6855 moreElementsVectorSrc(MI, MoreTy, 3);
6856 LLT CondTy = MoreTy.changeVectorElementType(
6857 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6858 moreElementsVectorDst(MI, CondTy, 0);
6859 Observer.changedInstr(MI);
6860 return Legalized;
6861 }
6862 case TargetOpcode::G_BITCAST: {
6863 if (TypeIdx != 0)
6864 return UnableToLegalize;
6865
6866 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6867 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6868
6869 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6870 if (coefficient % DstTy.getNumElements() != 0)
6871 return UnableToLegalize;
6872
6873 coefficient = coefficient / DstTy.getNumElements();
6874
6875 LLT NewTy = SrcTy.changeElementCount(
6876 ElementCount::get(coefficient, MoreTy.isScalable()));
6877 Observer.changingInstr(MI);
6878 moreElementsVectorSrc(MI, NewTy, 1);
6879 moreElementsVectorDst(MI, MoreTy, 0);
6880 Observer.changedInstr(MI);
6881 return Legalized;
6882 }
6883 case TargetOpcode::G_VECREDUCE_FADD:
6884 case TargetOpcode::G_VECREDUCE_FMUL:
6885 case TargetOpcode::G_VECREDUCE_ADD:
6886 case TargetOpcode::G_VECREDUCE_MUL:
6887 case TargetOpcode::G_VECREDUCE_AND:
6888 case TargetOpcode::G_VECREDUCE_OR:
6889 case TargetOpcode::G_VECREDUCE_XOR:
6890 case TargetOpcode::G_VECREDUCE_SMAX:
6891 case TargetOpcode::G_VECREDUCE_SMIN:
6892 case TargetOpcode::G_VECREDUCE_UMAX:
6893 case TargetOpcode::G_VECREDUCE_UMIN: {
6894 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6895 MachineOperand &MO = MI.getOperand(1);
6896 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6897 auto NeutralElement = getNeutralElementForVecReduce(
6898 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6899
6900 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6901 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6902 i != e; i++) {
6903 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6904 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6905 NeutralElement, Idx);
6906 }
6907
6908 Observer.changingInstr(MI);
6909 MO.setReg(NewVec.getReg(0));
6910 Observer.changedInstr(MI);
6911 return Legalized;
6912 }
6913
6914 default:
6915 return UnableToLegalize;
6916 }
6917}
6918
6921 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6922 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6923 unsigned MaskNumElts = Mask.size();
6924 unsigned SrcNumElts = SrcTy.getNumElements();
6925 LLT DestEltTy = DstTy.getElementType();
6926
6927 if (MaskNumElts == SrcNumElts)
6928 return Legalized;
6929
6930 if (MaskNumElts < SrcNumElts) {
6931 // Extend mask to match new destination vector size with
6932 // undef values.
6933 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6934 llvm::copy(Mask, NewMask.begin());
6935
6936 moreElementsVectorDst(MI, SrcTy, 0);
6937 MIRBuilder.setInstrAndDebugLoc(MI);
6938 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6939 MI.getOperand(1).getReg(),
6940 MI.getOperand(2).getReg(), NewMask);
6941 MI.eraseFromParent();
6942
6943 return Legalized;
6944 }
6945
6946 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6947 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6948 LLT PaddedTy =
6949 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6950
6951 // Create new source vectors by concatenating the initial
6952 // source vectors with undefined vectors of the same size.
6953 auto Undef = MIRBuilder.buildUndef(SrcTy);
6954 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6955 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6956 MOps1[0] = MI.getOperand(1).getReg();
6957 MOps2[0] = MI.getOperand(2).getReg();
6958
6959 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6960 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6961
6962 // Readjust mask for new input vector length.
6963 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6964 for (unsigned I = 0; I != MaskNumElts; ++I) {
6965 int Idx = Mask[I];
6966 if (Idx >= static_cast<int>(SrcNumElts))
6967 Idx += PaddedMaskNumElts - SrcNumElts;
6968 MappedOps[I] = Idx;
6969 }
6970
6971 // If we got more elements than required, extract subvector.
6972 if (MaskNumElts != PaddedMaskNumElts) {
6973 auto Shuffle =
6974 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6975
6976 SmallVector<Register, 16> Elts(MaskNumElts);
6977 for (unsigned I = 0; I < MaskNumElts; ++I) {
6978 Elts[I] =
6979 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6980 .getReg(0);
6981 }
6982 MIRBuilder.buildBuildVector(DstReg, Elts);
6983 } else {
6984 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6985 }
6986
6987 MI.eraseFromParent();
6989}
6990
6993 unsigned int TypeIdx, LLT MoreTy) {
6994 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6995 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6996 unsigned NumElts = DstTy.getNumElements();
6997 unsigned WidenNumElts = MoreTy.getNumElements();
6998
6999 if (DstTy.isVector() && Src1Ty.isVector() &&
7000 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7002 }
7003
7004 if (TypeIdx != 0)
7005 return UnableToLegalize;
7006
7007 // Expect a canonicalized shuffle.
7008 if (DstTy != Src1Ty || DstTy != Src2Ty)
7009 return UnableToLegalize;
7010
7011 moreElementsVectorSrc(MI, MoreTy, 1);
7012 moreElementsVectorSrc(MI, MoreTy, 2);
7013
7014 // Adjust mask based on new input vector length.
7015 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7016 for (unsigned I = 0; I != NumElts; ++I) {
7017 int Idx = Mask[I];
7018 if (Idx < static_cast<int>(NumElts))
7019 NewMask[I] = Idx;
7020 else
7021 NewMask[I] = Idx - NumElts + WidenNumElts;
7022 }
7023 moreElementsVectorDst(MI, MoreTy, 0);
7024 MIRBuilder.setInstrAndDebugLoc(MI);
7025 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7026 MI.getOperand(1).getReg(),
7027 MI.getOperand(2).getReg(), NewMask);
7028 MI.eraseFromParent();
7029 return Legalized;
7030}
7031
7032void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7033 ArrayRef<Register> Src1Regs,
7034 ArrayRef<Register> Src2Regs,
7035 LLT NarrowTy) {
7037 unsigned SrcParts = Src1Regs.size();
7038 unsigned DstParts = DstRegs.size();
7039
7040 unsigned DstIdx = 0; // Low bits of the result.
7041 Register FactorSum =
7042 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7043 DstRegs[DstIdx] = FactorSum;
7044
7045 Register CarrySumPrevDstIdx;
7047
7048 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7049 // Collect low parts of muls for DstIdx.
7050 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7051 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7053 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7054 Factors.push_back(Mul.getReg(0));
7055 }
7056 // Collect high parts of muls from previous DstIdx.
7057 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7058 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7059 MachineInstrBuilder Umulh =
7060 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7061 Factors.push_back(Umulh.getReg(0));
7062 }
7063 // Add CarrySum from additions calculated for previous DstIdx.
7064 if (DstIdx != 1) {
7065 Factors.push_back(CarrySumPrevDstIdx);
7066 }
7067
7068 Register CarrySum;
7069 // Add all factors and accumulate all carries into CarrySum.
7070 if (DstIdx != DstParts - 1) {
7071 MachineInstrBuilder Uaddo =
7072 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7073 FactorSum = Uaddo.getReg(0);
7074 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7075 for (unsigned i = 2; i < Factors.size(); ++i) {
7076 MachineInstrBuilder Uaddo =
7077 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7078 FactorSum = Uaddo.getReg(0);
7079 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7080 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7081 }
7082 } else {
7083 // Since value for the next index is not calculated, neither is CarrySum.
7084 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7085 for (unsigned i = 2; i < Factors.size(); ++i)
7086 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7087 }
7088
7089 CarrySumPrevDstIdx = CarrySum;
7090 DstRegs[DstIdx] = FactorSum;
7091 Factors.clear();
7092 }
7093}
7094
7097 LLT NarrowTy) {
7098 if (TypeIdx != 0)
7099 return UnableToLegalize;
7100
7101 Register DstReg = MI.getOperand(0).getReg();
7102 LLT DstType = MRI.getType(DstReg);
7103 // FIXME: add support for vector types
7104 if (DstType.isVector())
7105 return UnableToLegalize;
7106
7107 unsigned Opcode = MI.getOpcode();
7108 unsigned OpO, OpE, OpF;
7109 switch (Opcode) {
7110 case TargetOpcode::G_SADDO:
7111 case TargetOpcode::G_SADDE:
7112 case TargetOpcode::G_UADDO:
7113 case TargetOpcode::G_UADDE:
7114 case TargetOpcode::G_ADD:
7115 OpO = TargetOpcode::G_UADDO;
7116 OpE = TargetOpcode::G_UADDE;
7117 OpF = TargetOpcode::G_UADDE;
7118 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7119 OpF = TargetOpcode::G_SADDE;
7120 break;
7121 case TargetOpcode::G_SSUBO:
7122 case TargetOpcode::G_SSUBE:
7123 case TargetOpcode::G_USUBO:
7124 case TargetOpcode::G_USUBE:
7125 case TargetOpcode::G_SUB:
7126 OpO = TargetOpcode::G_USUBO;
7127 OpE = TargetOpcode::G_USUBE;
7128 OpF = TargetOpcode::G_USUBE;
7129 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7130 OpF = TargetOpcode::G_SSUBE;
7131 break;
7132 default:
7133 llvm_unreachable("Unexpected add/sub opcode!");
7134 }
7135
7136 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7137 unsigned NumDefs = MI.getNumExplicitDefs();
7138 Register Src1 = MI.getOperand(NumDefs).getReg();
7139 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7140 Register CarryDst, CarryIn;
7141 if (NumDefs == 2)
7142 CarryDst = MI.getOperand(1).getReg();
7143 if (MI.getNumOperands() == NumDefs + 3)
7144 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7145
7146 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7147 LLT LeftoverTy, DummyTy;
7148 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7149 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7150 MIRBuilder, MRI);
7151 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7152 MRI);
7153
7154 int NarrowParts = Src1Regs.size();
7155 Src1Regs.append(Src1Left);
7156 Src2Regs.append(Src2Left);
7157 DstRegs.reserve(Src1Regs.size());
7158
7159 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7160 Register DstReg =
7161 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7162 Register CarryOut;
7163 // Forward the final carry-out to the destination register
7164 if (i == e - 1 && CarryDst)
7165 CarryOut = CarryDst;
7166 else
7167 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7168
7169 if (!CarryIn) {
7170 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7171 {Src1Regs[i], Src2Regs[i]});
7172 } else if (i == e - 1) {
7173 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7174 {Src1Regs[i], Src2Regs[i], CarryIn});
7175 } else {
7176 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7177 {Src1Regs[i], Src2Regs[i], CarryIn});
7178 }
7179
7180 DstRegs.push_back(DstReg);
7181 CarryIn = CarryOut;
7182 }
7183 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7184 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7185 ArrayRef(DstRegs).drop_front(NarrowParts));
7186
7187 MI.eraseFromParent();
7188 return Legalized;
7189}
7190
7193 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7194
7195 LLT Ty = MRI.getType(DstReg);
7196 if (Ty.isVector())
7197 return UnableToLegalize;
7198
7199 unsigned Size = Ty.getSizeInBits();
7200 unsigned NarrowSize = NarrowTy.getSizeInBits();
7201 if (Size % NarrowSize != 0)
7202 return UnableToLegalize;
7203
7204 unsigned NumParts = Size / NarrowSize;
7205 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7206 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7207
7208 SmallVector<Register, 2> Src1Parts, Src2Parts;
7209 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7210 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7211 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7212 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7213
7214 // Take only high half of registers if this is high mul.
7215 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7216 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7217 MI.eraseFromParent();
7218 return Legalized;
7219}
7220
7223 LLT NarrowTy) {
7224 if (TypeIdx != 0)
7225 return UnableToLegalize;
7226
7227 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7228
7229 Register Src = MI.getOperand(1).getReg();
7230 LLT SrcTy = MRI.getType(Src);
7231
7232 // If all finite floats fit into the narrowed integer type, we can just swap
7233 // out the result type. This is practically only useful for conversions from
7234 // half to at least 16-bits, so just handle the one case.
7235 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7236 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7237 return UnableToLegalize;
7238
7239 Observer.changingInstr(MI);
7240 narrowScalarDst(MI, NarrowTy, 0,
7241 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7242 Observer.changedInstr(MI);
7243 return Legalized;
7244}
7245
7248 LLT NarrowTy) {
7249 if (TypeIdx != 1)
7250 return UnableToLegalize;
7251
7252 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7253
7254 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7255 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7256 // NarrowSize.
7257 if (SizeOp1 % NarrowSize != 0)
7258 return UnableToLegalize;
7259 int NumParts = SizeOp1 / NarrowSize;
7260
7261 SmallVector<Register, 2> SrcRegs, DstRegs;
7262 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7263 MIRBuilder, MRI);
7264
7265 Register OpReg = MI.getOperand(0).getReg();
7266 uint64_t OpStart = MI.getOperand(2).getImm();
7267 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7268 for (int i = 0; i < NumParts; ++i) {
7269 unsigned SrcStart = i * NarrowSize;
7270
7271 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7272 // No part of the extract uses this subregister, ignore it.
7273 continue;
7274 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7275 // The entire subregister is extracted, forward the value.
7276 DstRegs.push_back(SrcRegs[i]);
7277 continue;
7278 }
7279
7280 // OpSegStart is where this destination segment would start in OpReg if it
7281 // extended infinitely in both directions.
7282 int64_t ExtractOffset;
7283 uint64_t SegSize;
7284 if (OpStart < SrcStart) {
7285 ExtractOffset = 0;
7286 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7287 } else {
7288 ExtractOffset = OpStart - SrcStart;
7289 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7290 }
7291
7292 Register SegReg = SrcRegs[i];
7293 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7294 // A genuine extract is needed.
7295 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7296 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7297 }
7298
7299 DstRegs.push_back(SegReg);
7300 }
7301
7302 Register DstReg = MI.getOperand(0).getReg();
7303 if (MRI.getType(DstReg).isVector())
7304 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7305 else if (DstRegs.size() > 1)
7306 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7307 else
7308 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7309 MI.eraseFromParent();
7310 return Legalized;
7311}
7312
7315 LLT NarrowTy) {
7316 // FIXME: Don't know how to handle secondary types yet.
7317 if (TypeIdx != 0)
7318 return UnableToLegalize;
7319
7320 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7321 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7322 LLT LeftoverTy;
7323 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7324 LeftoverRegs, MIRBuilder, MRI);
7325
7326 SrcRegs.append(LeftoverRegs);
7327
7328 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7329 Register OpReg = MI.getOperand(2).getReg();
7330 uint64_t OpStart = MI.getOperand(3).getImm();
7331 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7332 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7333 unsigned DstStart = I * NarrowSize;
7334
7335 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7336 // The entire subregister is defined by this insert, forward the new
7337 // value.
7338 DstRegs.push_back(OpReg);
7339 continue;
7340 }
7341
7342 Register SrcReg = SrcRegs[I];
7343 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7344 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7345 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7346 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7347 }
7348
7349 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7350 // No part of the insert affects this subregister, forward the original.
7351 DstRegs.push_back(SrcReg);
7352 continue;
7353 }
7354
7355 // OpSegStart is where this destination segment would start in OpReg if it
7356 // extended infinitely in both directions.
7357 int64_t ExtractOffset, InsertOffset;
7358 uint64_t SegSize;
7359 if (OpStart < DstStart) {
7360 InsertOffset = 0;
7361 ExtractOffset = DstStart - OpStart;
7362 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7363 } else {
7364 InsertOffset = OpStart - DstStart;
7365 ExtractOffset = 0;
7366 SegSize =
7367 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7368 }
7369
7370 Register SegReg = OpReg;
7371 if (ExtractOffset != 0 || SegSize != OpSize) {
7372 // A genuine extract is needed.
7373 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7374 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7375 }
7376
7377 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7378 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7379 DstRegs.push_back(DstReg);
7380 }
7381
7382 uint64_t WideSize = DstRegs.size() * NarrowSize;
7383 Register DstReg = MI.getOperand(0).getReg();
7384 if (WideSize > RegTy.getSizeInBits()) {
7385 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7386 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7387 MIRBuilder.buildTrunc(DstReg, MergeReg);
7388 } else
7389 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7390
7391 MI.eraseFromParent();
7392 return Legalized;
7393}
7394
7397 LLT NarrowTy) {
7398 Register DstReg = MI.getOperand(0).getReg();
7399 LLT DstTy = MRI.getType(DstReg);
7400
7401 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7402
7403 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7404 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7405 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7406 LLT LeftoverTy;
7407 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7408 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7409 return UnableToLegalize;
7410
7411 LLT Unused;
7412 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7413 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7414 llvm_unreachable("inconsistent extractParts result");
7415
7416 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7417 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7418 {Src0Regs[I], Src1Regs[I]});
7419 DstRegs.push_back(Inst.getReg(0));
7420 }
7421
7422 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7423 auto Inst = MIRBuilder.buildInstr(
7424 MI.getOpcode(),
7425 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7426 DstLeftoverRegs.push_back(Inst.getReg(0));
7427 }
7428
7429 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7430 LeftoverTy, DstLeftoverRegs);
7431
7432 MI.eraseFromParent();
7433 return Legalized;
7434}
7435
7438 LLT NarrowTy) {
7439 if (TypeIdx != 0)
7440 return UnableToLegalize;
7441
7442 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7443
7444 LLT DstTy = MRI.getType(DstReg);
7445 if (DstTy.isVector())
7446 return UnableToLegalize;
7447
7449 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7450 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7451 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7452
7453 MI.eraseFromParent();
7454 return Legalized;
7455}
7456
7459 LLT NarrowTy) {
7460 if (TypeIdx != 0)
7461 return UnableToLegalize;
7462
7463 Register CondReg = MI.getOperand(1).getReg();
7464 LLT CondTy = MRI.getType(CondReg);
7465 if (CondTy.isVector()) // TODO: Handle vselect
7466 return UnableToLegalize;
7467
7468 Register DstReg = MI.getOperand(0).getReg();
7469 LLT DstTy = MRI.getType(DstReg);
7470
7471 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7472 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7473 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7474 LLT LeftoverTy;
7475 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7476 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7477 return UnableToLegalize;
7478
7479 LLT Unused;
7480 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7481 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7482 llvm_unreachable("inconsistent extractParts result");
7483
7484 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7485 auto Select = MIRBuilder.buildSelect(NarrowTy,
7486 CondReg, Src1Regs[I], Src2Regs[I]);
7487 DstRegs.push_back(Select.getReg(0));
7488 }
7489
7490 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7491 auto Select = MIRBuilder.buildSelect(
7492 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7493 DstLeftoverRegs.push_back(Select.getReg(0));
7494 }
7495
7496 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7497 LeftoverTy, DstLeftoverRegs);
7498
7499 MI.eraseFromParent();
7500 return Legalized;
7501}
7502
7505 LLT NarrowTy) {
7506 if (TypeIdx != 1)
7507 return UnableToLegalize;
7508
7509 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7510 unsigned NarrowSize = NarrowTy.getSizeInBits();
7511
7512 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7513 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7514
7516 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7517 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7518 auto C_0 = B.buildConstant(NarrowTy, 0);
7519 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7520 UnmergeSrc.getReg(1), C_0);
7521 auto LoCTLZ = IsUndef ?
7522 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7523 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7524 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7525 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7526 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7527 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7528
7529 MI.eraseFromParent();
7530 return Legalized;
7531 }
7532
7533 return UnableToLegalize;
7534}
7535
7538 LLT NarrowTy) {
7539 if (TypeIdx != 1)
7540 return UnableToLegalize;
7541
7542 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7543 unsigned NarrowSize = NarrowTy.getSizeInBits();
7544
7545 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7546 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7547
7549 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7550 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7551 auto C_0 = B.buildConstant(NarrowTy, 0);
7552 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7553 UnmergeSrc.getReg(0), C_0);
7554 auto HiCTTZ = IsUndef ?
7555 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7556 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7557 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7558 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7559 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7560 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7561
7562 MI.eraseFromParent();
7563 return Legalized;
7564 }
7565
7566 return UnableToLegalize;
7567}
7568
7571 LLT NarrowTy) {
7572 if (TypeIdx != 1)
7573 return UnableToLegalize;
7574
7575 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7576 unsigned NarrowSize = NarrowTy.getSizeInBits();
7577
7578 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7579 return UnableToLegalize;
7580
7582
7583 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7584 Register Lo = UnmergeSrc.getReg(0);
7585 Register Hi = UnmergeSrc.getReg(1);
7586
7587 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7588 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7589
7590 auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
7591
7592 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7593 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7594 // Otherwise the leading zeros represent additional sign bits of the original
7595 // value.
7596 auto LoInv = B.buildXor(DstTy, Lo, Sign);
7597 auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
7598
7599 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7600 auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
7601 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7602
7603 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7604
7605 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7606
7607 MI.eraseFromParent();
7608 return Legalized;
7609}
7610
7613 LLT NarrowTy) {
7614 if (TypeIdx != 1)
7615 return UnableToLegalize;
7616
7617 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7618 unsigned NarrowSize = NarrowTy.getSizeInBits();
7619
7620 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7621 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7622
7623 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7624 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7625 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7626
7627 MI.eraseFromParent();
7628 return Legalized;
7629 }
7630
7631 return UnableToLegalize;
7632}
7633
7636 LLT NarrowTy) {
7637 if (TypeIdx != 1)
7638 return UnableToLegalize;
7639
7641 Register ExpReg = MI.getOperand(2).getReg();
7642 LLT ExpTy = MRI.getType(ExpReg);
7643
7644 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7645
7646 // Clamp the exponent to the range of the target type.
7647 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7648 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7649 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7650 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7651
7652 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7653 Observer.changingInstr(MI);
7654 MI.getOperand(2).setReg(Trunc.getReg(0));
7655 Observer.changedInstr(MI);
7656 return Legalized;
7657}
7658
7661 unsigned Opc = MI.getOpcode();
7662 const auto &TII = MIRBuilder.getTII();
7663 auto isSupported = [this](const LegalityQuery &Q) {
7664 auto QAction = LI.getAction(Q).Action;
7665 return QAction == Legal || QAction == Libcall || QAction == Custom;
7666 };
7667 switch (Opc) {
7668 default:
7669 return UnableToLegalize;
7670 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7671 // This trivially expands to CTLZ.
7672 Observer.changingInstr(MI);
7673 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7674 Observer.changedInstr(MI);
7675 return Legalized;
7676 }
7677 case TargetOpcode::G_CTLZ: {
7678 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7679 unsigned Len = SrcTy.getScalarSizeInBits();
7680
7681 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7682 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7683 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7684 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7685 auto ICmp = MIRBuilder.buildICmp(
7686 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7687 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7688 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7689 MI.eraseFromParent();
7690 return Legalized;
7691 }
7692 // for now, we do this:
7693 // NewLen = NextPowerOf2(Len);
7694 // x = x | (x >> 1);
7695 // x = x | (x >> 2);
7696 // ...
7697 // x = x | (x >>16);
7698 // x = x | (x >>32); // for 64-bit input
7699 // Upto NewLen/2
7700 // return Len - popcount(x);
7701 //
7702 // Ref: "Hacker's Delight" by Henry Warren
7703 Register Op = SrcReg;
7704 unsigned NewLen = PowerOf2Ceil(Len);
7705 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7706 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7707 auto MIBOp = MIRBuilder.buildOr(
7708 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7709 Op = MIBOp.getReg(0);
7710 }
7711 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7712 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7713 MIBPop);
7714 MI.eraseFromParent();
7715 return Legalized;
7716 }
7717 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7718 // This trivially expands to CTTZ.
7719 Observer.changingInstr(MI);
7720 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7721 Observer.changedInstr(MI);
7722 return Legalized;
7723 }
7724 case TargetOpcode::G_CTTZ: {
7725 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7726
7727 unsigned Len = SrcTy.getScalarSizeInBits();
7728 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7729 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7730 // zero.
7731 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7732 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7733 auto ICmp = MIRBuilder.buildICmp(
7734 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7735 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7736 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7737 MI.eraseFromParent();
7738 return Legalized;
7739 }
7740 // for now, we use: { return popcount(~x & (x - 1)); }
7741 // unless the target has ctlz but not ctpop, in which case we use:
7742 // { return 32 - nlz(~x & (x-1)); }
7743 // Ref: "Hacker's Delight" by Henry Warren
7744 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7745 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7746 auto MIBTmp = MIRBuilder.buildAnd(
7747 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7748 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7749 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7750 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7751 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7752 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7753 MI.eraseFromParent();
7754 return Legalized;
7755 }
7756 Observer.changingInstr(MI);
7757 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7758 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7759 Observer.changedInstr(MI);
7760 return Legalized;
7761 }
7762 case TargetOpcode::G_CTPOP: {
7763 Register SrcReg = MI.getOperand(1).getReg();
7764 LLT Ty = MRI.getType(SrcReg);
7765 unsigned Size = Ty.getScalarSizeInBits();
7767
7768 // Bail out on irregular type lengths.
7769 if (Size > 128 || Size % 8 != 0)
7770 return UnableToLegalize;
7771
7772 // Count set bits in blocks of 2 bits. Default approach would be
7773 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7774 // We use following formula instead:
7775 // B2Count = val - { (val >> 1) & 0x55555555 }
7776 // since it gives same result in blocks of 2 with one instruction less.
7777 auto C_1 = B.buildConstant(Ty, 1);
7778 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7779 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7780 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7781 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7782 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7783
7784 // In order to get count in blocks of 4 add values from adjacent block of 2.
7785 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7786 auto C_2 = B.buildConstant(Ty, 2);
7787 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7788 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7789 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7790 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7791 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7792 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7793
7794 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7795 // addition since count value sits in range {0,...,8} and 4 bits are enough
7796 // to hold such binary values. After addition high 4 bits still hold count
7797 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7798 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7799 auto C_4 = B.buildConstant(Ty, 4);
7800 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7801 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7802 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7803 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7804 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7805
7806 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7807
7808 // Avoid the multiply when shift-add is cheaper.
7809 if (Size == 16 && !Ty.isVector()) {
7810 // v = (v + (v >> 8)) & 0xFF;
7811 auto C_8 = B.buildConstant(Ty, 8);
7812 auto HighSum = B.buildLShr(Ty, B8Count, C_8);
7813 auto Res = B.buildAdd(Ty, B8Count, HighSum);
7814 B.buildAnd(MI.getOperand(0).getReg(), Res, B.buildConstant(Ty, 0xFF));
7815 MI.eraseFromParent();
7816 return Legalized;
7817 }
7818
7819 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7820 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7821 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7822
7823 // Shift count result from 8 high bits to low bits.
7824 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7825
7826 auto IsMulSupported = [this](const LLT Ty) {
7827 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7828 return Action == Legal || Action == WidenScalar || Action == Custom;
7829 };
7830 if (IsMulSupported(Ty)) {
7831 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7832 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7833 } else {
7834 auto ResTmp = B8Count;
7835 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7836 auto ShiftC = B.buildConstant(Ty, Shift);
7837 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7838 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7839 }
7840 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7841 }
7842 MI.eraseFromParent();
7843 return Legalized;
7844 }
7845 case TargetOpcode::G_CTLS: {
7846 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7847
7848 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7849 auto SignIdxC =
7850 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7851 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7852
7853 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7854
7855 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7856 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7857
7858 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7859 MI.eraseFromParent();
7860 return Legalized;
7861 }
7862 }
7863}
7864
7865// Check that (every element of) Reg is undef or not an exact multiple of BW.
7867 Register Reg, unsigned BW) {
7868 return matchUnaryPredicate(
7869 MRI, Reg,
7870 [=](const Constant *C) {
7871 // Null constant here means an undef.
7873 return !CI || CI->getValue().urem(BW) != 0;
7874 },
7875 /*AllowUndefs*/ true);
7876}
7877
7880 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7881 LLT Ty = MRI.getType(Dst);
7882 LLT ShTy = MRI.getType(Z);
7883
7884 unsigned BW = Ty.getScalarSizeInBits();
7885
7886 if (!isPowerOf2_32(BW))
7887 return UnableToLegalize;
7888
7889 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7890 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7891
7892 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7893 // fshl X, Y, Z -> fshr X, Y, -Z
7894 // fshr X, Y, Z -> fshl X, Y, -Z
7895 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7896 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7897 } else {
7898 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7899 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7900 auto One = MIRBuilder.buildConstant(ShTy, 1);
7901 if (IsFSHL) {
7902 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7903 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7904 } else {
7905 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7906 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7907 }
7908
7909 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7910 }
7911
7912 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7913 MI.eraseFromParent();
7914 return Legalized;
7915}
7916
7919 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7920 LLT Ty = MRI.getType(Dst);
7921 LLT ShTy = MRI.getType(Z);
7922
7923 const unsigned BW = Ty.getScalarSizeInBits();
7924 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7925
7926 Register ShX, ShY;
7927 Register ShAmt, InvShAmt;
7928
7929 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7930 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7931 // fshl: X << C | Y >> (BW - C)
7932 // fshr: X << (BW - C) | Y >> C
7933 // where C = Z % BW is not zero
7934 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7935 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7936 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7937 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7938 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7939 } else {
7940 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7941 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7942 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7943 if (isPowerOf2_32(BW)) {
7944 // Z % BW -> Z & (BW - 1)
7945 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7946 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7947 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7948 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7949 } else {
7950 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7951 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7952 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7953 }
7954
7955 auto One = MIRBuilder.buildConstant(ShTy, 1);
7956 if (IsFSHL) {
7957 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7958 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7959 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7960 } else {
7961 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7962 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7963 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7964 }
7965 }
7966
7967 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7968 MI.eraseFromParent();
7969 return Legalized;
7970}
7971
7974 // These operations approximately do the following (while avoiding undefined
7975 // shifts by BW):
7976 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7977 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7978 Register Dst = MI.getOperand(0).getReg();
7979 LLT Ty = MRI.getType(Dst);
7980 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7981
7982 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7983 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7984
7985 // TODO: Use smarter heuristic that accounts for vector legalization.
7986 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7987 return lowerFunnelShiftAsShifts(MI);
7988
7989 // This only works for powers of 2, fallback to shifts if it fails.
7990 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7991 if (Result == UnableToLegalize)
7992 return lowerFunnelShiftAsShifts(MI);
7993 return Result;
7994}
7995
7997 auto [Dst, Src] = MI.getFirst2Regs();
7998 LLT DstTy = MRI.getType(Dst);
7999 LLT SrcTy = MRI.getType(Src);
8000
8001 uint32_t DstTySize = DstTy.getSizeInBits();
8002 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8003 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8004
8005 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
8006 !isPowerOf2_32(SrcTyScalarSize))
8007 return UnableToLegalize;
8008
8009 // The step between extend is too large, split it by creating an intermediate
8010 // extend instruction
8011 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8012 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
8013 // If the destination type is illegal, split it into multiple statements
8014 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8015 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8016 // Unmerge the vector
8017 LLT EltTy = MidTy.changeElementCount(
8019 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8020
8021 // ZExt the vectors
8022 LLT ZExtResTy = DstTy.changeElementCount(
8024 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8025 {UnmergeSrc.getReg(0)});
8026 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8027 {UnmergeSrc.getReg(1)});
8028
8029 // Merge the ending vectors
8030 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8031
8032 MI.eraseFromParent();
8033 return Legalized;
8034 }
8035 return UnableToLegalize;
8036}
8037
8039 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8040 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8041 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8042 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8043 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8044 // %lo16(<4 x s16>) = G_TRUNC %inlo
8045 // %hi16(<4 x s16>) = G_TRUNC %inhi
8046 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8047 // %res(<8 x s8>) = G_TRUNC %in16
8048
8049 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8050
8051 Register DstReg = MI.getOperand(0).getReg();
8052 Register SrcReg = MI.getOperand(1).getReg();
8053 LLT DstTy = MRI.getType(DstReg);
8054 LLT SrcTy = MRI.getType(SrcReg);
8055
8056 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8058 isPowerOf2_32(SrcTy.getNumElements()) &&
8059 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8060 // Split input type.
8061 LLT SplitSrcTy = SrcTy.changeElementCount(
8062 SrcTy.getElementCount().divideCoefficientBy(2));
8063
8064 // First, split the source into two smaller vectors.
8065 SmallVector<Register, 2> SplitSrcs;
8066 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8067
8068 // Truncate the splits into intermediate narrower elements.
8069 LLT InterTy;
8070 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8071 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8072 else
8073 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8074 for (Register &Src : SplitSrcs)
8075 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8076
8077 // Combine the new truncates into one vector
8078 auto Merge = MIRBuilder.buildMergeLikeInstr(
8079 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8080
8081 // Truncate the new vector to the final result type
8082 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8083 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8084 else
8085 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8086
8087 MI.eraseFromParent();
8088
8089 return Legalized;
8090 }
8091 return UnableToLegalize;
8092}
8093
8096 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8097 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8098 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8099 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8100 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8101 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8102 MI.eraseFromParent();
8103 return Legalized;
8104}
8105
8107 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8108
8109 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8110 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8111
8112 MIRBuilder.setInstrAndDebugLoc(MI);
8113
8114 // If a rotate in the other direction is supported, use it.
8115 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8116 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8117 isPowerOf2_32(EltSizeInBits))
8118 return lowerRotateWithReverseRotate(MI);
8119
8120 // If a funnel shift is supported, use it.
8121 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8122 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8123 bool IsFShLegal = false;
8124 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8125 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8126 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8127 Register R3) {
8128 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8129 MI.eraseFromParent();
8130 return Legalized;
8131 };
8132 // If a funnel shift in the other direction is supported, use it.
8133 if (IsFShLegal) {
8134 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8135 } else if (isPowerOf2_32(EltSizeInBits)) {
8136 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8137 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8138 }
8139 }
8140
8141 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8142 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8143 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8144 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8145 Register ShVal;
8146 Register RevShiftVal;
8147 if (isPowerOf2_32(EltSizeInBits)) {
8148 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8149 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8150 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8151 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8152 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8153 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8154 RevShiftVal =
8155 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8156 } else {
8157 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8158 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8159 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8160 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8161 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8162 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8163 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8164 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8165 RevShiftVal =
8166 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8167 }
8168 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8169 MI.eraseFromParent();
8170 return Legalized;
8171}
8172
8173// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8174// representation.
8177 auto [Dst, Src] = MI.getFirst2Regs();
8178 const LLT S64 = LLT::scalar(64);
8179 const LLT S32 = LLT::scalar(32);
8180 const LLT S1 = LLT::scalar(1);
8181
8182 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8183
8184 // unsigned cul2f(ulong u) {
8185 // uint lz = clz(u);
8186 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8187 // u = (u << lz) & 0x7fffffffffffffffUL;
8188 // ulong t = u & 0xffffffffffUL;
8189 // uint v = (e << 23) | (uint)(u >> 40);
8190 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8191 // return as_float(v + r);
8192 // }
8193
8194 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8195 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8196
8197 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8198
8199 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8200 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8201
8202 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8203 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8204
8205 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8206 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8207
8208 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8209
8210 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8211 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8212
8213 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8214 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8215 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8216
8217 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8218 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8219 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8220 auto One = MIRBuilder.buildConstant(S32, 1);
8221
8222 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8223 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8224 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8225 MIRBuilder.buildAdd(Dst, V, R);
8226
8227 MI.eraseFromParent();
8228 return Legalized;
8229}
8230
8231// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8232// operations and G_SITOFP
8235 auto [Dst, Src] = MI.getFirst2Regs();
8236 const LLT S64 = LLT::scalar(64);
8237 const LLT S32 = LLT::scalar(32);
8238 const LLT S1 = LLT::scalar(1);
8239
8240 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8241
8242 // For i64 < INT_MAX we simply reuse SITOFP.
8243 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8244 // saved before division, convert to float by SITOFP, multiply the result
8245 // by 2.
8246 auto One = MIRBuilder.buildConstant(S64, 1);
8247 auto Zero = MIRBuilder.buildConstant(S64, 0);
8248 // Result if Src < INT_MAX
8249 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8250 // Result if Src >= INT_MAX
8251 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8252 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8253 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8254 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8255 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8256 // Check if the original value is larger than INT_MAX by comparing with
8257 // zero to pick one of the two conversions.
8258 auto IsLarge =
8259 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8260 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8261
8262 MI.eraseFromParent();
8263 return Legalized;
8264}
8265
8266// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8267// IEEE double representation.
8270 auto [Dst, Src] = MI.getFirst2Regs();
8271 const LLT S64 = LLT::scalar(64);
8272 const LLT S32 = LLT::scalar(32);
8273
8274 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8275
8276 // We create double value from 32 bit parts with 32 exponent difference.
8277 // Note that + and - are float operations that adjust the implicit leading
8278 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8279 //
8280 // X = 2^52 * 1.0...LowBits
8281 // Y = 2^84 * 1.0...HighBits
8282 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8283 // = - 2^52 * 1.0...HighBits
8284 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8285 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8286 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8287 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8288 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8289 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8290
8291 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8292 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8293 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8294 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8295 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8296 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8297 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8298
8299 MI.eraseFromParent();
8300 return Legalized;
8301}
8302
8303/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8304/// convert fpround f64->f16 without double-rounding, so we manually perform the
8305/// lowering here where we know it is valid.
8308 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8309 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8310 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8311 : MIRBuilder.buildSITOFP(SrcTy, Src);
8312 LLT S32Ty = SrcTy.changeElementSize(32);
8313 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8314 MIRBuilder.buildFPTrunc(Dst, M2);
8315 MI.eraseFromParent();
8317}
8318
8320 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8321
8322 if (SrcTy == LLT::scalar(1)) {
8323 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8324 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8325 MIRBuilder.buildSelect(Dst, Src, True, False);
8326 MI.eraseFromParent();
8327 return Legalized;
8328 }
8329
8330 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8331 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8332
8333 if (SrcTy != LLT::scalar(64))
8334 return UnableToLegalize;
8335
8336 if (DstTy == LLT::scalar(32))
8337 // TODO: SelectionDAG has several alternative expansions to port which may
8338 // be more reasonable depending on the available instructions. We also need
8339 // a more advanced mechanism to choose an optimal version depending on
8340 // target features such as sitofp or CTLZ availability.
8342
8343 if (DstTy == LLT::scalar(64))
8345
8346 return UnableToLegalize;
8347}
8348
8350 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8351
8352 const LLT S64 = LLT::scalar(64);
8353 const LLT S32 = LLT::scalar(32);
8354 const LLT S1 = LLT::scalar(1);
8355
8356 if (SrcTy == S1) {
8357 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8358 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8359 MIRBuilder.buildSelect(Dst, Src, True, False);
8360 MI.eraseFromParent();
8361 return Legalized;
8362 }
8363
8364 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8365 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8366
8367 if (SrcTy != S64)
8368 return UnableToLegalize;
8369
8370 if (DstTy == S32) {
8371 // signed cl2f(long l) {
8372 // long s = l >> 63;
8373 // float r = cul2f((l + s) ^ s);
8374 // return s ? -r : r;
8375 // }
8376 Register L = Src;
8377 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8378 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8379
8380 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8381 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8382 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8383
8384 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8385 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8386 MIRBuilder.buildConstant(S64, 0));
8387 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8388 MI.eraseFromParent();
8389 return Legalized;
8390 }
8391
8392 return UnableToLegalize;
8393}
8394
8396 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8397 const LLT S64 = LLT::scalar(64);
8398 const LLT S32 = LLT::scalar(32);
8399
8400 if (SrcTy != S64 && SrcTy != S32)
8401 return UnableToLegalize;
8402 if (DstTy != S32 && DstTy != S64)
8403 return UnableToLegalize;
8404
8405 // FPTOSI gives same result as FPTOUI for positive signed integers.
8406 // FPTOUI needs to deal with fp values that convert to unsigned integers
8407 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8408
8409 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8410 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8412 APInt::getZero(SrcTy.getSizeInBits()));
8413 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8414
8415 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8416
8417 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8418 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8419 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8420 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8421 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8422 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8423 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8424
8425 const LLT S1 = LLT::scalar(1);
8426
8427 MachineInstrBuilder FCMP =
8428 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8429 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8430
8431 MI.eraseFromParent();
8432 return Legalized;
8433}
8434
8436 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8437 const LLT S64 = LLT::scalar(64);
8438 const LLT S32 = LLT::scalar(32);
8439
8440 // FIXME: Only f32 to i64 conversions are supported.
8441 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8442 return UnableToLegalize;
8443
8444 // Expand f32 -> i64 conversion
8445 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8446 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8447
8448 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8449
8450 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8451 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8452
8453 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8454 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8455
8456 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8457 APInt::getSignMask(SrcEltBits));
8458 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8459 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8460 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8461 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8462
8463 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8464 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8465 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8466
8467 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8468 R = MIRBuilder.buildZExt(DstTy, R);
8469
8470 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8471 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8472 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8473 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8474
8475 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8476 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8477
8478 const LLT S1 = LLT::scalar(1);
8479 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8480 S1, Exponent, ExponentLoBit);
8481
8482 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8483
8484 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8485 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8486
8487 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8488
8489 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8490 S1, Exponent, ZeroSrcTy);
8491
8492 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8493 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8494
8495 MI.eraseFromParent();
8496 return Legalized;
8497}
8498
8501 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8502
8503 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8504 unsigned SatWidth = DstTy.getScalarSizeInBits();
8505
8506 // Determine minimum and maximum integer values and their corresponding
8507 // floating-point values.
8508 APInt MinInt, MaxInt;
8509 if (IsSigned) {
8510 MinInt = APInt::getSignedMinValue(SatWidth);
8511 MaxInt = APInt::getSignedMaxValue(SatWidth);
8512 } else {
8513 MinInt = APInt::getMinValue(SatWidth);
8514 MaxInt = APInt::getMaxValue(SatWidth);
8515 }
8516
8517 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8518 APFloat MinFloat(Semantics);
8519 APFloat MaxFloat(Semantics);
8520
8521 APFloat::opStatus MinStatus =
8522 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8523 APFloat::opStatus MaxStatus =
8524 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8525 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8526 !(MaxStatus & APFloat::opStatus::opInexact);
8527
8528 // If the integer bounds are exactly representable as floats, emit a
8529 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8530 // and selects.
8531 if (AreExactFloatBounds) {
8532 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8533 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8534 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8535 SrcTy.changeElementSize(1), Src, MaxC);
8536 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8537 // Clamp by MaxFloat from above. NaN cannot occur.
8538 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8539 auto MinP =
8540 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8542 auto Min =
8543 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8544 // Convert clamped value to integer. In the unsigned case we're done,
8545 // because we mapped NaN to MinFloat, which will cast to zero.
8546 if (!IsSigned) {
8547 MIRBuilder.buildFPTOUI(Dst, Min);
8548 MI.eraseFromParent();
8549 return Legalized;
8550 }
8551
8552 // Otherwise, select 0 if Src is NaN.
8553 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8554 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8555 DstTy.changeElementSize(1), Src, Src);
8556 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8557 FpToInt);
8558 MI.eraseFromParent();
8559 return Legalized;
8560 }
8561
8562 // Result of direct conversion. The assumption here is that the operation is
8563 // non-trapping and it's fine to apply it to an out-of-range value if we
8564 // select it away later.
8565 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8566 : MIRBuilder.buildFPTOUI(DstTy, Src);
8567
8568 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8569 // MinInt if Src is NaN.
8570 auto ULT =
8571 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8572 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8573 auto Max = MIRBuilder.buildSelect(
8574 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8575 // If Src OGT MaxFloat, select MaxInt.
8576 auto OGT =
8577 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8578 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8579
8580 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8581 // is already zero.
8582 if (!IsSigned) {
8583 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8584 Max);
8585 MI.eraseFromParent();
8586 return Legalized;
8587 }
8588
8589 // Otherwise, select 0 if Src is NaN.
8590 auto Min = MIRBuilder.buildSelect(
8591 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8592 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8593 DstTy.changeElementSize(1), Src, Src);
8594 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8595 MI.eraseFromParent();
8596 return Legalized;
8597}
8598
8599// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8602 const LLT S1 = LLT::scalar(1);
8603 const LLT S32 = LLT::scalar(32);
8604
8605 auto [Dst, Src] = MI.getFirst2Regs();
8606 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8607 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8608
8609 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8610 return UnableToLegalize;
8611
8612 if (MI.getFlag(MachineInstr::FmAfn)) {
8613 unsigned Flags = MI.getFlags();
8614 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8615 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8616 MI.eraseFromParent();
8617 return Legalized;
8618 }
8619
8620 const unsigned ExpMask = 0x7ff;
8621 const unsigned ExpBiasf64 = 1023;
8622 const unsigned ExpBiasf16 = 15;
8623
8624 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8625 Register U = Unmerge.getReg(0);
8626 Register UH = Unmerge.getReg(1);
8627
8628 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8629 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8630
8631 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8632 // add the f16 bias (15) to get the biased exponent for the f16 format.
8633 E = MIRBuilder.buildAdd(
8634 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8635
8636 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8637 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8638
8639 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8640 MIRBuilder.buildConstant(S32, 0x1ff));
8641 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8642
8643 auto Zero = MIRBuilder.buildConstant(S32, 0);
8644 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8645 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8646 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8647
8648 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8649 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8650 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8651 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8652
8653 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8654 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8655
8656 // N = M | (E << 12);
8657 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8658 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8659
8660 // B = clamp(1-E, 0, 13);
8661 auto One = MIRBuilder.buildConstant(S32, 1);
8662 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8663 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8664 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8665
8666 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8667 MIRBuilder.buildConstant(S32, 0x1000));
8668
8669 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8670 auto D0 = MIRBuilder.buildShl(S32, D, B);
8671
8672 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8673 D0, SigSetHigh);
8674 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8675 D = MIRBuilder.buildOr(S32, D, D1);
8676
8677 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8678 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8679
8680 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8681 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8682
8683 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8684 MIRBuilder.buildConstant(S32, 3));
8685 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8686
8687 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8688 MIRBuilder.buildConstant(S32, 5));
8689 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8690
8691 V1 = MIRBuilder.buildOr(S32, V0, V1);
8692 V = MIRBuilder.buildAdd(S32, V, V1);
8693
8694 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8695 E, MIRBuilder.buildConstant(S32, 30));
8696 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8697 MIRBuilder.buildConstant(S32, 0x7c00), V);
8698
8699 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8700 E, MIRBuilder.buildConstant(S32, 1039));
8701 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8702
8703 // Extract the sign bit.
8704 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8705 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8706
8707 // Insert the sign bit
8708 V = MIRBuilder.buildOr(S32, Sign, V);
8709
8710 MIRBuilder.buildTrunc(Dst, V);
8711 MI.eraseFromParent();
8712 return Legalized;
8713}
8714
8717 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8718 const LLT S64 = LLT::scalar(64);
8719 const LLT S16 = LLT::scalar(16);
8720
8721 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8723
8724 return UnableToLegalize;
8725}
8726
8728 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8729 LLT Ty = MRI.getType(Dst);
8730
8731 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8732 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8733 MI.eraseFromParent();
8734 return Legalized;
8735}
8736
8738 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8739 LLT Ty = MRI.getType(Src);
8740 auto Flags = MI.getFlags();
8741
8742 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Ty, Src, Flags);
8743 auto FracPart = MIRBuilder.buildFSub(Ty, Src, IntPart, Flags);
8744
8745 Register FracToUse;
8746 if (MI.getFlag(MachineInstr::FmNoInfs)) {
8747 FracToUse = FracPart.getReg(0);
8748 } else {
8749 auto Abs = MIRBuilder.buildFAbs(Ty, Src, Flags);
8750 const fltSemantics &Semantics = getFltSemanticForLLT(Ty.getScalarType());
8751 auto Inf = MIRBuilder.buildFConstant(Ty, APFloat::getInf(Semantics));
8752 auto IsInf = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ,
8753 Ty.changeElementSize(1), Abs, Inf);
8754 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8755 auto Select = MIRBuilder.buildSelect(Ty, IsInf, Zero, FracPart);
8756 FracToUse = Select.getReg(0);
8757 }
8758
8759 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8760 MIRBuilder.buildCopy(DstInt, IntPart.getReg(0));
8761
8762 MI.eraseFromParent();
8763 return Legalized;
8764}
8765
8767 switch (Opc) {
8768 case TargetOpcode::G_SMIN:
8769 return CmpInst::ICMP_SLT;
8770 case TargetOpcode::G_SMAX:
8771 return CmpInst::ICMP_SGT;
8772 case TargetOpcode::G_UMIN:
8773 return CmpInst::ICMP_ULT;
8774 case TargetOpcode::G_UMAX:
8775 return CmpInst::ICMP_UGT;
8776 default:
8777 llvm_unreachable("not in integer min/max");
8778 }
8779}
8780
8782 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8783
8784 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8785 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::scalar(1));
8786
8787 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8788 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8789
8790 MI.eraseFromParent();
8791 return Legalized;
8792}
8793
8796 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8797
8798 Register Dst = Cmp->getReg(0);
8799 LLT DstTy = MRI.getType(Dst);
8800 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8801 LLT CmpTy = DstTy.changeElementSize(1);
8802
8803 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8806 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8809
8810 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8811 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8812 Cmp->getRHSReg());
8813 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8814 Cmp->getRHSReg());
8815
8816 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8817 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8818 if (TLI.preferSelectsOverBooleanArithmetic(
8819 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8821 auto One = MIRBuilder.buildConstant(DstTy, 1);
8822 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8823
8824 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8825 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8826 } else {
8828 std::swap(IsGT, IsLT);
8829 // Extend boolean results to DstTy, which is at least i2, before subtracting
8830 // them.
8831 unsigned BoolExtOp =
8832 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8833 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8834 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8835 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8836 }
8837
8838 MI.eraseFromParent();
8839 return Legalized;
8840}
8841
8844 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8845 const int Src0Size = Src0Ty.getScalarSizeInBits();
8846 const int Src1Size = Src1Ty.getScalarSizeInBits();
8847
8848 auto SignBitMask = MIRBuilder.buildConstant(
8849 Src0Ty, APInt::getSignMask(Src0Size));
8850
8851 auto NotSignBitMask = MIRBuilder.buildConstant(
8852 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8853
8854 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8855 Register And1;
8856 if (Src0Ty == Src1Ty) {
8857 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8858 } else if (Src0Size > Src1Size) {
8859 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8860 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8861 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8862 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8863 } else {
8864 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8865 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8866 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8867 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8868 }
8869
8870 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8871 // constants are a nan and -0.0, but the final result should preserve
8872 // everything.
8873 unsigned Flags = MI.getFlags();
8874
8875 // We masked the sign bit and the not-sign bit, so these are disjoint.
8876 Flags |= MachineInstr::Disjoint;
8877
8878 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8879
8880 MI.eraseFromParent();
8881 return Legalized;
8882}
8883
8886 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8887 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8888 // depend on fminnum/fmaxnum.
8889
8890 unsigned NewOp;
8891 switch (MI.getOpcode()) {
8892 case TargetOpcode::G_FMINNUM:
8893 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8894 break;
8895 case TargetOpcode::G_FMINIMUMNUM:
8896 NewOp = TargetOpcode::G_FMINNUM;
8897 break;
8898 case TargetOpcode::G_FMAXNUM:
8899 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8900 break;
8901 case TargetOpcode::G_FMAXIMUMNUM:
8902 NewOp = TargetOpcode::G_FMAXNUM;
8903 break;
8904 default:
8905 llvm_unreachable("unexpected min/max opcode");
8906 }
8907
8908 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8909 LLT Ty = MRI.getType(Dst);
8910
8911 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8912 // Insert canonicalizes if it's possible we need to quiet to get correct
8913 // sNaN behavior.
8914
8915 // Note this must be done here, and not as an optimization combine in the
8916 // absence of a dedicate quiet-snan instruction as we're using an
8917 // omni-purpose G_FCANONICALIZE.
8918 if (!isKnownNeverSNaN(Src0, MRI))
8919 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8920
8921 if (!isKnownNeverSNaN(Src1, MRI))
8922 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8923 }
8924
8925 // If there are no nans, it's safe to simply replace this with the non-IEEE
8926 // version.
8927 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8928 MI.eraseFromParent();
8929 return Legalized;
8930}
8931
8934 unsigned Opc = MI.getOpcode();
8935 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8936 LLT Ty = MRI.getType(Dst);
8937 LLT CmpTy = Ty.changeElementSize(1);
8938
8939 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8940 unsigned OpcIeee =
8941 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8942 unsigned OpcNonIeee =
8943 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8944 bool MinMaxMustRespectOrderedZero = false;
8945 Register Res;
8946
8947 // IEEE variants don't need canonicalization
8948 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8949 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8950 MinMaxMustRespectOrderedZero = true;
8951 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8952 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8953 } else {
8954 auto Compare = MIRBuilder.buildFCmp(
8955 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8956 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8957 }
8958
8959 // Propagate any NaN of both operands
8960 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8961 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8962 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8963
8964 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8965 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8966 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8967 if (Ty.isVector())
8968 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8969
8970 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8971 }
8972
8973 // fminimum/fmaximum requires -0.0 less than +0.0
8974 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8975 GISelValueTracking VT(MIRBuilder.getMF());
8976 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8977 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8978
8979 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8980 const unsigned Flags = MI.getFlags();
8981 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8982 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8983
8984 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8985
8986 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8987 auto LHSSelect =
8988 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8989
8990 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8991 auto RHSSelect =
8992 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8993
8994 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8995 }
8996 }
8997
8998 MIRBuilder.buildCopy(Dst, Res);
8999 MI.eraseFromParent();
9000 return Legalized;
9001}
9002
9004 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9005 Register DstReg = MI.getOperand(0).getReg();
9006 LLT Ty = MRI.getType(DstReg);
9007 unsigned Flags = MI.getFlags();
9008
9009 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
9010 Flags);
9011 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
9012 MI.eraseFromParent();
9013 return Legalized;
9014}
9015
9018 auto [DstReg, X] = MI.getFirst2Regs();
9019 const unsigned Flags = MI.getFlags();
9020 const LLT Ty = MRI.getType(DstReg);
9021 const LLT CondTy = Ty.changeElementSize(1);
9022
9023 // round(x) =>
9024 // t = trunc(x);
9025 // d = fabs(x - t);
9026 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9027 // return t + o;
9028
9029 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
9030
9031 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
9032 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
9033
9034 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
9035 auto Cmp =
9036 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
9037
9038 // Could emit G_UITOFP instead
9039 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
9040 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9041 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9042 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
9043
9044 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9045
9046 MI.eraseFromParent();
9047 return Legalized;
9048}
9049
9051 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9052 unsigned Flags = MI.getFlags();
9053 LLT Ty = MRI.getType(DstReg);
9054 const LLT CondTy = Ty.changeElementSize(1);
9055
9056 // result = trunc(src);
9057 // if (src < 0.0 && src != result)
9058 // result += -1.0.
9059
9060 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9061 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9062
9063 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9064 SrcReg, Zero, Flags);
9065 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9066 SrcReg, Trunc, Flags);
9067 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9068 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9069
9070 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9071 MI.eraseFromParent();
9072 return Legalized;
9073}
9074
9077 const unsigned NumOps = MI.getNumOperands();
9078 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9079 unsigned PartSize = Src0Ty.getSizeInBits();
9080
9081 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9082 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9083
9084 for (unsigned I = 2; I != NumOps; ++I) {
9085 const unsigned Offset = (I - 1) * PartSize;
9086
9087 Register SrcReg = MI.getOperand(I).getReg();
9088 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9089
9090 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9091 MRI.createGenericVirtualRegister(WideTy);
9092
9093 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9094 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9095 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9096 ResultReg = NextResult;
9097 }
9098
9099 if (DstTy.isPointer()) {
9100 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9101 DstTy.getAddressSpace())) {
9102 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9103 return UnableToLegalize;
9104 }
9105
9106 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9107 }
9108
9109 MI.eraseFromParent();
9110 return Legalized;
9111}
9112
9115 const unsigned NumDst = MI.getNumOperands() - 1;
9116 Register SrcReg = MI.getOperand(NumDst).getReg();
9117 Register Dst0Reg = MI.getOperand(0).getReg();
9118 LLT DstTy = MRI.getType(Dst0Reg);
9119 if (DstTy.isPointer())
9120 return UnableToLegalize; // TODO
9121
9122 SrcReg = coerceToScalar(SrcReg);
9123 if (!SrcReg)
9124 return UnableToLegalize;
9125
9126 // Expand scalarizing unmerge as bitcast to integer and shift.
9127 LLT IntTy = MRI.getType(SrcReg);
9128
9129 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9130
9131 const unsigned DstSize = DstTy.getSizeInBits();
9132 unsigned Offset = DstSize;
9133 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9134 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9135 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9136 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9137 }
9138
9139 MI.eraseFromParent();
9140 return Legalized;
9141}
9142
9143/// Lower a vector extract or insert by writing the vector to a stack temporary
9144/// and reloading the element or vector.
9145///
9146/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9147/// =>
9148/// %stack_temp = G_FRAME_INDEX
9149/// G_STORE %vec, %stack_temp
9150/// %idx = clamp(%idx, %vec.getNumElements())
9151/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9152/// %dst = G_LOAD %element_ptr
9155 Register DstReg = MI.getOperand(0).getReg();
9156 Register SrcVec = MI.getOperand(1).getReg();
9157 Register InsertVal;
9158 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9159 InsertVal = MI.getOperand(2).getReg();
9160
9161 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9162
9163 LLT VecTy = MRI.getType(SrcVec);
9164 LLT EltTy = VecTy.getElementType();
9165 unsigned NumElts = VecTy.getNumElements();
9166
9167 int64_t IdxVal;
9168 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9170 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9171
9172 if (InsertVal) {
9173 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9174 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9175 } else {
9176 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9177 }
9178
9179 MI.eraseFromParent();
9180 return Legalized;
9181 }
9182
9183 if (!EltTy.isByteSized()) { // Not implemented.
9184 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9185 return UnableToLegalize;
9186 }
9187
9188 unsigned EltBytes = EltTy.getSizeInBytes();
9189 Align VecAlign = getStackTemporaryAlignment(VecTy);
9190 Align EltAlign;
9191
9192 MachinePointerInfo PtrInfo;
9193 auto StackTemp = createStackTemporary(
9194 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9195 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9196
9197 // Get the pointer to the element, and be sure not to hit undefined behavior
9198 // if the index is out of bounds.
9199 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9200
9201 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9202 int64_t Offset = IdxVal * EltBytes;
9203 PtrInfo = PtrInfo.getWithOffset(Offset);
9204 EltAlign = commonAlignment(VecAlign, Offset);
9205 } else {
9206 // We lose information with a variable offset.
9207 EltAlign = getStackTemporaryAlignment(EltTy);
9208 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9209 }
9210
9211 if (InsertVal) {
9212 // Write the inserted element
9213 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9214
9215 // Reload the whole vector.
9216 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9217 } else {
9218 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9219 }
9220
9221 MI.eraseFromParent();
9222 return Legalized;
9223}
9224
9227 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9228 MI.getFirst3RegLLTs();
9229 LLT IdxTy = LLT::scalar(32);
9230
9231 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9234 LLT EltTy = DstTy.getScalarType();
9235
9236 DenseMap<unsigned, Register> CachedExtract;
9237
9238 for (int Idx : Mask) {
9239 if (Idx < 0) {
9240 if (!Undef.isValid())
9241 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9242 BuildVec.push_back(Undef);
9243 continue;
9244 }
9245
9246 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9247
9248 int NumElts = Src0Ty.getNumElements();
9249 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9250 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9251 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9252 if (Inserted) {
9253 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9254 It->second =
9255 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9256 }
9257 BuildVec.push_back(It->second);
9258 }
9259
9260 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9261 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9262 MI.eraseFromParent();
9263 return Legalized;
9264}
9265
9268 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9269 MI.getFirst4RegLLTs();
9270
9271 if (VecTy.isScalableVector())
9272 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9273
9274 Align VecAlign = getStackTemporaryAlignment(VecTy);
9275 MachinePointerInfo PtrInfo;
9276 Register StackPtr =
9277 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9278 PtrInfo)
9279 .getReg(0);
9280 MachinePointerInfo ValPtrInfo =
9282
9283 LLT IdxTy = LLT::scalar(32);
9284 LLT ValTy = VecTy.getElementType();
9285 Align ValAlign = getStackTemporaryAlignment(ValTy);
9286
9287 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9288
9289 bool HasPassthru =
9290 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9291
9292 if (HasPassthru)
9293 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9294
9295 Register LastWriteVal;
9296 std::optional<APInt> PassthruSplatVal =
9297 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9298
9299 if (PassthruSplatVal.has_value()) {
9300 LastWriteVal =
9301 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9302 } else if (HasPassthru) {
9303 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9304 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9305 {LLT::scalar(32)}, {Popcount});
9306
9307 Register LastElmtPtr =
9308 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9309 LastWriteVal =
9310 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9311 .getReg(0);
9312 }
9313
9314 unsigned NumElmts = VecTy.getNumElements();
9315 for (unsigned I = 0; I < NumElmts; ++I) {
9316 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9317 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9318 Register ElmtPtr =
9319 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9320 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9321
9322 LLT MaskITy = MaskTy.getElementType();
9323 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9324 if (MaskITy.getSizeInBits() > 1)
9325 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9326
9327 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9328 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9329
9330 if (HasPassthru && I == NumElmts - 1) {
9331 auto EndOfVector =
9332 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9333 auto AllLanesSelected = MIRBuilder.buildICmp(
9334 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9335 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9336 {OutPos, EndOfVector});
9337 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9338
9339 LastWriteVal =
9340 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9341 .getReg(0);
9342 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9343 }
9344 }
9345
9346 // TODO: Use StackPtr's FrameIndex alignment.
9347 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9348
9349 MI.eraseFromParent();
9350 return Legalized;
9351}
9352
9354 Register AllocSize,
9355 Align Alignment,
9356 LLT PtrTy) {
9357 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9358
9359 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9360 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9361
9362 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9363 // have to generate an extra instruction to negate the alloc and then use
9364 // G_PTR_ADD to add the negative offset.
9365 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9366 if (Alignment > Align(1)) {
9367 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9368 AlignMask.negate();
9369 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9370 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9371 }
9372
9373 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9374}
9375
9378 const auto &MF = *MI.getMF();
9379 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9380 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9381 return UnableToLegalize;
9382
9383 Register Dst = MI.getOperand(0).getReg();
9384 Register AllocSize = MI.getOperand(1).getReg();
9385 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9386
9387 LLT PtrTy = MRI.getType(Dst);
9388 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9389 Register SPTmp =
9390 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9391
9392 MIRBuilder.buildCopy(SPReg, SPTmp);
9393 MIRBuilder.buildCopy(Dst, SPTmp);
9394
9395 MI.eraseFromParent();
9396 return Legalized;
9397}
9398
9401 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9402 if (!StackPtr)
9403 return UnableToLegalize;
9404
9405 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9406 MI.eraseFromParent();
9407 return Legalized;
9408}
9409
9412 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9413 if (!StackPtr)
9414 return UnableToLegalize;
9415
9416 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9417 MI.eraseFromParent();
9418 return Legalized;
9419}
9420
9423 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9424 unsigned Offset = MI.getOperand(2).getImm();
9425
9426 // Extract sub-vector or one element
9427 if (SrcTy.isVector()) {
9428 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9429 unsigned DstSize = DstTy.getSizeInBits();
9430
9431 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9432 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9433 // Unmerge and allow access to each Src element for the artifact combiner.
9434 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9435
9436 // Take element(s) we need to extract and copy it (merge them).
9437 SmallVector<Register, 8> SubVectorElts;
9438 for (unsigned Idx = Offset / SrcEltSize;
9439 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9440 SubVectorElts.push_back(Unmerge.getReg(Idx));
9441 }
9442 if (SubVectorElts.size() == 1)
9443 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9444 else
9445 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9446
9447 MI.eraseFromParent();
9448 return Legalized;
9449 }
9450 }
9451
9452 if (DstTy.isScalar() &&
9453 (SrcTy.isScalar() ||
9454 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9455 LLT SrcIntTy = SrcTy;
9456 if (!SrcTy.isScalar()) {
9457 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9458 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9459 }
9460
9461 if (Offset == 0)
9462 MIRBuilder.buildTrunc(DstReg, SrcReg);
9463 else {
9464 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9465 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9466 MIRBuilder.buildTrunc(DstReg, Shr);
9467 }
9468
9469 MI.eraseFromParent();
9470 return Legalized;
9471 }
9472
9473 return UnableToLegalize;
9474}
9475
9477 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9478 uint64_t Offset = MI.getOperand(3).getImm();
9479
9480 LLT DstTy = MRI.getType(Src);
9481 LLT InsertTy = MRI.getType(InsertSrc);
9482
9483 // Insert sub-vector or one element
9484 if (DstTy.isVector() && !InsertTy.isPointer()) {
9485 LLT EltTy = DstTy.getElementType();
9486 unsigned EltSize = EltTy.getSizeInBits();
9487 unsigned InsertSize = InsertTy.getSizeInBits();
9488
9489 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9490 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9491 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9493 unsigned Idx = 0;
9494 // Elements from Src before insert start Offset
9495 for (; Idx < Offset / EltSize; ++Idx) {
9496 DstElts.push_back(UnmergeSrc.getReg(Idx));
9497 }
9498
9499 // Replace elements in Src with elements from InsertSrc
9500 if (InsertTy.getSizeInBits() > EltSize) {
9501 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9502 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9503 ++Idx, ++i) {
9504 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9505 }
9506 } else {
9507 DstElts.push_back(InsertSrc);
9508 ++Idx;
9509 }
9510
9511 // Remaining elements from Src after insert
9512 for (; Idx < DstTy.getNumElements(); ++Idx) {
9513 DstElts.push_back(UnmergeSrc.getReg(Idx));
9514 }
9515
9516 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9517 MI.eraseFromParent();
9518 return Legalized;
9519 }
9520 }
9521
9522 if (InsertTy.isVector() ||
9523 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9524 return UnableToLegalize;
9525
9526 const DataLayout &DL = MIRBuilder.getDataLayout();
9527 if ((DstTy.isPointer() &&
9528 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9529 (InsertTy.isPointer() &&
9530 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9531 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9532 return UnableToLegalize;
9533 }
9534
9535 LLT IntDstTy = DstTy;
9536
9537 if (!DstTy.isScalar()) {
9538 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9539 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9540 }
9541
9542 if (!InsertTy.isScalar()) {
9543 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9544 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9545 }
9546
9547 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9548 if (Offset != 0) {
9549 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9550 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9551 }
9552
9554 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9555
9556 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9557 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9558 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9559
9560 MIRBuilder.buildCast(Dst, Or);
9561 MI.eraseFromParent();
9562 return Legalized;
9563}
9564
9567 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9568 MI.getFirst4RegLLTs();
9569 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9570
9571 LLT Ty = Dst0Ty;
9572 LLT BoolTy = Dst1Ty;
9573
9574 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9575
9576 if (IsAdd)
9577 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9578 else
9579 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9580
9581 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9582
9583 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9584
9585 // For an addition, the result should be less than one of the operands (LHS)
9586 // if and only if the other operand (RHS) is negative, otherwise there will
9587 // be overflow.
9588 // For a subtraction, the result should be less than one of the operands
9589 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9590 // otherwise there will be overflow.
9591 auto ResultLowerThanLHS =
9592 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9593 auto ConditionRHS = MIRBuilder.buildICmp(
9594 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9595
9596 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9597
9598 MIRBuilder.buildCopy(Dst0, NewDst0);
9599 MI.eraseFromParent();
9600
9601 return Legalized;
9602}
9603
9605 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9606 const LLT Ty = MRI.getType(Res);
9607
9608 // sum = LHS + RHS + zext(CarryIn)
9609 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9610 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9611 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9612 MIRBuilder.buildCopy(Res, Sum);
9613
9614 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9615 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9616 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9617 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9618
9619 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9620 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9621
9622 MI.eraseFromParent();
9623 return Legalized;
9624}
9625
9627 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9628 const LLT Ty = MRI.getType(Res);
9629
9630 // Diff = LHS - (RHS + zext(CarryIn))
9631 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9632 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9633 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9634 MIRBuilder.buildCopy(Res, Diff);
9635
9636 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9637 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9638 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9639 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9640 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9641 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9642
9643 MI.eraseFromParent();
9644 return Legalized;
9645}
9646
9649 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9650 LLT Ty = MRI.getType(Res);
9651 bool IsSigned;
9652 bool IsAdd;
9653 unsigned BaseOp;
9654 switch (MI.getOpcode()) {
9655 default:
9656 llvm_unreachable("unexpected addsat/subsat opcode");
9657 case TargetOpcode::G_UADDSAT:
9658 IsSigned = false;
9659 IsAdd = true;
9660 BaseOp = TargetOpcode::G_ADD;
9661 break;
9662 case TargetOpcode::G_SADDSAT:
9663 IsSigned = true;
9664 IsAdd = true;
9665 BaseOp = TargetOpcode::G_ADD;
9666 break;
9667 case TargetOpcode::G_USUBSAT:
9668 IsSigned = false;
9669 IsAdd = false;
9670 BaseOp = TargetOpcode::G_SUB;
9671 break;
9672 case TargetOpcode::G_SSUBSAT:
9673 IsSigned = true;
9674 IsAdd = false;
9675 BaseOp = TargetOpcode::G_SUB;
9676 break;
9677 }
9678
9679 if (IsSigned) {
9680 // sadd.sat(a, b) ->
9681 // hi = 0x7fffffff - smax(a, 0)
9682 // lo = 0x80000000 - smin(a, 0)
9683 // a + smin(smax(lo, b), hi)
9684 // ssub.sat(a, b) ->
9685 // lo = smax(a, -1) - 0x7fffffff
9686 // hi = smin(a, -1) - 0x80000000
9687 // a - smin(smax(lo, b), hi)
9688 // TODO: AMDGPU can use a "median of 3" instruction here:
9689 // a +/- med3(lo, b, hi)
9690 uint64_t NumBits = Ty.getScalarSizeInBits();
9691 auto MaxVal =
9692 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9693 auto MinVal =
9694 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9696 if (IsAdd) {
9697 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9698 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9699 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9700 } else {
9701 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9702 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9703 MaxVal);
9704 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9705 MinVal);
9706 }
9707 auto RHSClamped =
9708 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9709 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9710 } else {
9711 // uadd.sat(a, b) -> a + umin(~a, b)
9712 // usub.sat(a, b) -> a - umin(a, b)
9713 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9714 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9715 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9716 }
9717
9718 MI.eraseFromParent();
9719 return Legalized;
9720}
9721
9724 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9725 LLT Ty = MRI.getType(Res);
9726 LLT BoolTy = Ty.changeElementSize(1);
9727 bool IsSigned;
9728 bool IsAdd;
9729 unsigned OverflowOp;
9730 switch (MI.getOpcode()) {
9731 default:
9732 llvm_unreachable("unexpected addsat/subsat opcode");
9733 case TargetOpcode::G_UADDSAT:
9734 IsSigned = false;
9735 IsAdd = true;
9736 OverflowOp = TargetOpcode::G_UADDO;
9737 break;
9738 case TargetOpcode::G_SADDSAT:
9739 IsSigned = true;
9740 IsAdd = true;
9741 OverflowOp = TargetOpcode::G_SADDO;
9742 break;
9743 case TargetOpcode::G_USUBSAT:
9744 IsSigned = false;
9745 IsAdd = false;
9746 OverflowOp = TargetOpcode::G_USUBO;
9747 break;
9748 case TargetOpcode::G_SSUBSAT:
9749 IsSigned = true;
9750 IsAdd = false;
9751 OverflowOp = TargetOpcode::G_SSUBO;
9752 break;
9753 }
9754
9755 auto OverflowRes =
9756 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9757 Register Tmp = OverflowRes.getReg(0);
9758 Register Ov = OverflowRes.getReg(1);
9759 MachineInstrBuilder Clamp;
9760 if (IsSigned) {
9761 // sadd.sat(a, b) ->
9762 // {tmp, ov} = saddo(a, b)
9763 // ov ? (tmp >>s 31) + 0x80000000 : r
9764 // ssub.sat(a, b) ->
9765 // {tmp, ov} = ssubo(a, b)
9766 // ov ? (tmp >>s 31) + 0x80000000 : r
9767 uint64_t NumBits = Ty.getScalarSizeInBits();
9768 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9769 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9770 auto MinVal =
9771 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9772 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9773 } else {
9774 // uadd.sat(a, b) ->
9775 // {tmp, ov} = uaddo(a, b)
9776 // ov ? 0xffffffff : tmp
9777 // usub.sat(a, b) ->
9778 // {tmp, ov} = usubo(a, b)
9779 // ov ? 0 : tmp
9780 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9781 }
9782 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9783
9784 MI.eraseFromParent();
9785 return Legalized;
9786}
9787
9790 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9791 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9792 "Expected shlsat opcode!");
9793 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9794 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9795 LLT Ty = MRI.getType(Res);
9796 LLT BoolTy = Ty.changeElementSize(1);
9797
9798 unsigned BW = Ty.getScalarSizeInBits();
9799 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9800 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9801 : MIRBuilder.buildLShr(Ty, Result, RHS);
9802
9803 MachineInstrBuilder SatVal;
9804 if (IsSigned) {
9805 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9806 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9807 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9808 MIRBuilder.buildConstant(Ty, 0));
9809 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9810 } else {
9811 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9812 }
9813 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9814 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9815
9816 MI.eraseFromParent();
9817 return Legalized;
9818}
9819
9821 auto [Dst, Src] = MI.getFirst2Regs();
9822 const LLT Ty = MRI.getType(Src);
9823 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9824 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9825
9826 // Swap most and least significant byte, set remaining bytes in Res to zero.
9827 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9828 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9829 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9830 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9831
9832 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9833 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9834 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9835 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9836 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9837 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9838 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9839 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9840 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9841 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9842 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9843 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9844 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9845 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9846 }
9847 Res.getInstr()->getOperand(0).setReg(Dst);
9848
9849 MI.eraseFromParent();
9850 return Legalized;
9851}
9852
9853//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9855 MachineInstrBuilder Src, const APInt &Mask) {
9856 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9857 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9858 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9859 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9860 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9861 return B.buildOr(Dst, LHS, RHS);
9862}
9863
9866 auto [Dst, Src] = MI.getFirst2Regs();
9867 const LLT SrcTy = MRI.getType(Src);
9868 unsigned Size = SrcTy.getScalarSizeInBits();
9869 unsigned VSize = SrcTy.getSizeInBits();
9870
9871 if (Size >= 8) {
9872 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9873 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9874 {LLT::fixed_vector(VSize / 8, 8),
9875 LLT::fixed_vector(VSize / 8, 8)}}))) {
9876 // If bitreverse is legal for i8 vector of the same size, then cast
9877 // to i8 vector type.
9878 // e.g. v4s32 -> v16s8
9879 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9880 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9881 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9882 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9883 MIRBuilder.buildBitcast(Dst, RBIT);
9884 } else {
9885 MachineInstrBuilder BSWAP =
9886 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9887
9888 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9889 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9890 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9891 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9892 APInt::getSplat(Size, APInt(8, 0xF0)));
9893
9894 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9895 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9896 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9897 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9898 APInt::getSplat(Size, APInt(8, 0xCC)));
9899
9900 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9901 // 6|7
9902 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9903 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9904 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9905 }
9906 } else {
9907 // Expand bitreverse for types smaller than 8 bits.
9909 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9911 if (I < J) {
9912 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9913 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9914 } else {
9915 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9916 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9917 }
9918
9919 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9920 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9921 if (I == 0)
9922 Tmp = Tmp2;
9923 else
9924 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9925 }
9926 MIRBuilder.buildCopy(Dst, Tmp);
9927 }
9928
9929 MI.eraseFromParent();
9930 return Legalized;
9931}
9932
9935 MachineFunction &MF = MIRBuilder.getMF();
9936
9937 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9938 int NameOpIdx = IsRead ? 1 : 0;
9939 int ValRegIndex = IsRead ? 0 : 1;
9940
9941 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9942 const LLT Ty = MRI.getType(ValReg);
9943 const MDString *RegStr = cast<MDString>(
9944 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9945
9946 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9947 if (!PhysReg) {
9948 const Function &Fn = MF.getFunction();
9950 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9951 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9952 Fn, MI.getDebugLoc()));
9953 if (IsRead)
9954 MIRBuilder.buildUndef(ValReg);
9955
9956 MI.eraseFromParent();
9957 return Legalized;
9958 }
9959
9960 if (IsRead)
9961 MIRBuilder.buildCopy(ValReg, PhysReg);
9962 else
9963 MIRBuilder.buildCopy(PhysReg, ValReg);
9964
9965 MI.eraseFromParent();
9966 return Legalized;
9967}
9968
9971 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9972 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9973 Register Result = MI.getOperand(0).getReg();
9974 LLT OrigTy = MRI.getType(Result);
9975 auto SizeInBits = OrigTy.getScalarSizeInBits();
9976 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9977
9978 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9979 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9980 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9981 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9982
9983 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9984 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9985 MIRBuilder.buildTrunc(Result, Shifted);
9986
9987 MI.eraseFromParent();
9988 return Legalized;
9989}
9990
9993 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9994 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9995
9996 if (Mask == fcNone) {
9997 MIRBuilder.buildConstant(DstReg, 0);
9998 MI.eraseFromParent();
9999 return Legalized;
10000 }
10001 if (Mask == fcAllFlags) {
10002 MIRBuilder.buildConstant(DstReg, 1);
10003 MI.eraseFromParent();
10004 return Legalized;
10005 }
10006
10007 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10008 // version
10009
10010 unsigned BitSize = SrcTy.getScalarSizeInBits();
10011 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
10012
10013 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
10014 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
10015
10016 // Various masks.
10017 APInt SignBit = APInt::getSignMask(BitSize);
10018 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10019 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10020 APInt ExpMask = Inf;
10021 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10022 APInt QNaNBitMask =
10023 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10024 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
10025
10026 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
10027 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
10028 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
10029 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
10030 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
10031
10032 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10033 auto Sign =
10034 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
10035
10036 auto Res = MIRBuilder.buildConstant(DstTy, 0);
10037 // Clang doesn't support capture of structured bindings:
10038 LLT DstTyCopy = DstTy;
10039 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10040 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10041 };
10042
10043 // Tests that involve more than one class should be processed first.
10044 if ((Mask & fcFinite) == fcFinite) {
10045 // finite(V) ==> abs(V) u< exp_mask
10046 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10047 ExpMaskC));
10048 Mask &= ~fcFinite;
10049 } else if ((Mask & fcFinite) == fcPosFinite) {
10050 // finite(V) && V > 0 ==> V u< exp_mask
10051 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10052 ExpMaskC));
10053 Mask &= ~fcPosFinite;
10054 } else if ((Mask & fcFinite) == fcNegFinite) {
10055 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10056 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10057 ExpMaskC);
10058 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10059 appendToRes(And);
10060 Mask &= ~fcNegFinite;
10061 }
10062
10063 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10064 // fcZero | fcSubnormal => test all exponent bits are 0
10065 // TODO: Handle sign bit specific cases
10066 // TODO: Handle inverted case
10067 if (PartialCheck == (fcZero | fcSubnormal)) {
10068 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10069 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10070 ExpBits, ZeroC));
10071 Mask &= ~PartialCheck;
10072 }
10073 }
10074
10075 // Check for individual classes.
10076 if (FPClassTest PartialCheck = Mask & fcZero) {
10077 if (PartialCheck == fcPosZero)
10078 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10079 AsInt, ZeroC));
10080 else if (PartialCheck == fcZero)
10081 appendToRes(
10082 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10083 else // fcNegZero
10084 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10085 AsInt, SignBitC));
10086 }
10087
10088 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10089 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10090 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10091 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10092 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10093 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10094 auto SubnormalRes =
10095 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10096 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10097 if (PartialCheck == fcNegSubnormal)
10098 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10099 appendToRes(SubnormalRes);
10100 }
10101
10102 if (FPClassTest PartialCheck = Mask & fcInf) {
10103 if (PartialCheck == fcPosInf)
10104 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10105 AsInt, InfC));
10106 else if (PartialCheck == fcInf)
10107 appendToRes(
10108 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10109 else { // fcNegInf
10110 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10111 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10112 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10113 AsInt, NegInfC));
10114 }
10115 }
10116
10117 if (FPClassTest PartialCheck = Mask & fcNan) {
10118 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10119 if (PartialCheck == fcNan) {
10120 // isnan(V) ==> abs(V) u> int(inf)
10121 appendToRes(
10122 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10123 } else if (PartialCheck == fcQNan) {
10124 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10125 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10126 InfWithQnanBitC));
10127 } else { // fcSNan
10128 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10129 // abs(V) u< (unsigned(Inf) | quiet_bit)
10130 auto IsNan =
10131 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10132 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10133 Abs, InfWithQnanBitC);
10134 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10135 }
10136 }
10137
10138 if (FPClassTest PartialCheck = Mask & fcNormal) {
10139 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10140 // (max_exp-1))
10141 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10142 auto ExpMinusOne = MIRBuilder.buildSub(
10143 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10144 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10145 auto NormalRes =
10146 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10147 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10148 if (PartialCheck == fcNegNormal)
10149 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10150 else if (PartialCheck == fcPosNormal) {
10151 auto PosSign = MIRBuilder.buildXor(
10152 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10153 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10154 }
10155 appendToRes(NormalRes);
10156 }
10157
10158 MIRBuilder.buildCopy(DstReg, Res);
10159 MI.eraseFromParent();
10160 return Legalized;
10161}
10162
10164 // Implement G_SELECT in terms of XOR, AND, OR.
10165 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10166 MI.getFirst4RegLLTs();
10167
10168 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10169 if (IsEltPtr) {
10170 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10171 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10172 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10173 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10174 DstTy = NewTy;
10175 }
10176
10177 if (MaskTy.isScalar()) {
10178 // Turn the scalar condition into a vector condition mask if needed.
10179
10180 Register MaskElt = MaskReg;
10181
10182 // The condition was potentially zero extended before, but we want a sign
10183 // extended boolean.
10184 if (MaskTy != LLT::scalar(1))
10185 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10186
10187 // Continue the sign extension (or truncate) to match the data type.
10188 MaskElt =
10189 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10190
10191 if (DstTy.isVector()) {
10192 // Generate a vector splat idiom.
10193 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10194 MaskReg = ShufSplat.getReg(0);
10195 } else {
10196 MaskReg = MaskElt;
10197 }
10198 MaskTy = DstTy;
10199 } else if (!DstTy.isVector()) {
10200 // Cannot handle the case that mask is a vector and dst is a scalar.
10201 return UnableToLegalize;
10202 }
10203
10204 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10205 return UnableToLegalize;
10206 }
10207
10208 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10209 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10210 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10211 if (IsEltPtr) {
10212 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10213 MIRBuilder.buildIntToPtr(DstReg, Or);
10214 } else {
10215 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10216 }
10217 MI.eraseFromParent();
10218 return Legalized;
10219}
10220
10222 // Split DIVREM into individual instructions.
10223 unsigned Opcode = MI.getOpcode();
10224
10225 MIRBuilder.buildInstr(
10226 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10227 : TargetOpcode::G_UDIV,
10228 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10229 MIRBuilder.buildInstr(
10230 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10231 : TargetOpcode::G_UREM,
10232 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10233 MI.eraseFromParent();
10234 return Legalized;
10235}
10236
10239 // Expand %res = G_ABS %a into:
10240 // %v1 = G_ASHR %a, scalar_size-1
10241 // %v2 = G_ADD %a, %v1
10242 // %res = G_XOR %v2, %v1
10243 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10244 Register OpReg = MI.getOperand(1).getReg();
10245 auto ShiftAmt =
10246 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10247 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10248 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10249 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10250 MI.eraseFromParent();
10251 return Legalized;
10252}
10253
10256 // Expand %res = G_ABS %a into:
10257 // %v1 = G_CONSTANT 0
10258 // %v2 = G_SUB %v1, %a
10259 // %res = G_SMAX %a, %v2
10260 Register SrcReg = MI.getOperand(1).getReg();
10261 LLT Ty = MRI.getType(SrcReg);
10262 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10263 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10264 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10265 MI.eraseFromParent();
10266 return Legalized;
10267}
10268
10271 Register SrcReg = MI.getOperand(1).getReg();
10272 Register DestReg = MI.getOperand(0).getReg();
10273 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10274 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10275 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10276 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10277 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10278 MI.eraseFromParent();
10279 return Legalized;
10280}
10281
10284 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10285 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10286 "Expected G_ABDS or G_ABDU instruction");
10287
10288 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10289 LLT Ty = MRI.getType(LHS);
10290
10291 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10292 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10293 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10294 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10295 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10298 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10299 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10300
10301 MI.eraseFromParent();
10302 return Legalized;
10303}
10304
10307 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10308 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10309 "Expected G_ABDS or G_ABDU instruction");
10310
10311 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10312 LLT Ty = MRI.getType(LHS);
10313
10314 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10315 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10316 Register MaxReg, MinReg;
10317 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10318 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10319 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10320 } else {
10321 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10322 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10323 }
10324 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10325
10326 MI.eraseFromParent();
10327 return Legalized;
10328}
10329
10331 Register SrcReg = MI.getOperand(1).getReg();
10332 Register DstReg = MI.getOperand(0).getReg();
10333
10334 LLT Ty = MRI.getType(DstReg);
10335
10336 // Reset sign bit
10337 MIRBuilder.buildAnd(
10338 DstReg, SrcReg,
10339 MIRBuilder.buildConstant(
10340 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10341
10342 MI.eraseFromParent();
10343 return Legalized;
10344}
10345
10348 Register SrcReg = MI.getOperand(1).getReg();
10349 LLT SrcTy = MRI.getType(SrcReg);
10350 LLT DstTy = MRI.getType(SrcReg);
10351
10352 // The source could be a scalar if the IR type was <1 x sN>.
10353 if (SrcTy.isScalar()) {
10354 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10355 return UnableToLegalize; // FIXME: handle extension.
10356 // This can be just a plain copy.
10357 Observer.changingInstr(MI);
10358 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10359 Observer.changedInstr(MI);
10360 return Legalized;
10361 }
10362 return UnableToLegalize;
10363}
10364
10366 MachineFunction &MF = *MI.getMF();
10367 const DataLayout &DL = MIRBuilder.getDataLayout();
10368 LLVMContext &Ctx = MF.getFunction().getContext();
10369 Register ListPtr = MI.getOperand(1).getReg();
10370 LLT PtrTy = MRI.getType(ListPtr);
10371
10372 // LstPtr is a pointer to the head of the list. Get the address
10373 // of the head of the list.
10374 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10375 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10376 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10377 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10378
10379 const Align A(MI.getOperand(2).getImm());
10380 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10381 if (A > TLI.getMinStackArgumentAlignment()) {
10382 Register AlignAmt =
10383 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10384 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10385 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10386 VAList = AndDst.getReg(0);
10387 }
10388
10389 // Increment the pointer, VAList, to the next vaarg
10390 // The list should be bumped by the size of element in the current head of
10391 // list.
10392 Register Dst = MI.getOperand(0).getReg();
10393 LLT LLTTy = MRI.getType(Dst);
10394 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10395 auto IncAmt =
10396 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10397 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10398
10399 // Store the increment VAList to the legalized pointer
10401 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10402 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10403 // Load the actual argument out of the pointer VAList
10404 Align EltAlignment = DL.getABITypeAlign(Ty);
10405 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10406 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10407 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10408
10409 MI.eraseFromParent();
10410 return Legalized;
10411}
10412
10414 // On Darwin, -Os means optimize for size without hurting performance, so
10415 // only really optimize for size when -Oz (MinSize) is used.
10417 return MF.getFunction().hasMinSize();
10418 return MF.getFunction().hasOptSize();
10419}
10420
10421// Returns a list of types to use for memory op lowering in MemOps. A partial
10422// port of findOptimalMemOpLowering in TargetLowering.
10423static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10424 unsigned Limit, const MemOp &Op,
10425 unsigned DstAS, unsigned SrcAS,
10426 const AttributeList &FuncAttributes,
10427 const TargetLowering &TLI) {
10428 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10429 return false;
10430
10431 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10432
10433 if (Ty == LLT()) {
10434 // Use the largest scalar type whose alignment constraints are satisfied.
10435 // We only need to check DstAlign here as SrcAlign is always greater or
10436 // equal to DstAlign (or zero).
10437 Ty = LLT::scalar(64);
10438 if (Op.isFixedDstAlign())
10439 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10440 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10441 Ty = LLT::scalar(Ty.getSizeInBytes());
10442 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10443 // FIXME: check for the largest legal type we can load/store to.
10444 }
10445
10446 unsigned NumMemOps = 0;
10447 uint64_t Size = Op.size();
10448 while (Size) {
10449 unsigned TySize = Ty.getSizeInBytes();
10450 while (TySize > Size) {
10451 // For now, only use non-vector load / store's for the left-over pieces.
10452 LLT NewTy = Ty;
10453 // FIXME: check for mem op safety and legality of the types. Not all of
10454 // SDAGisms map cleanly to GISel concepts.
10455 if (NewTy.isVector())
10456 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10457 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10458 unsigned NewTySize = NewTy.getSizeInBytes();
10459 assert(NewTySize > 0 && "Could not find appropriate type");
10460
10461 // If the new LLT cannot cover all of the remaining bits, then consider
10462 // issuing a (or a pair of) unaligned and overlapping load / store.
10463 unsigned Fast;
10464 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10465 MVT VT = getMVTForLLT(Ty);
10466 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10468 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10470 Fast)
10471 TySize = Size;
10472 else {
10473 Ty = NewTy;
10474 TySize = NewTySize;
10475 }
10476 }
10477
10478 if (++NumMemOps > Limit)
10479 return false;
10480
10481 MemOps.push_back(Ty);
10482 Size -= TySize;
10483 }
10484
10485 return true;
10486}
10487
10488// Get a vectorized representation of the memset value operand, GISel edition.
10490 MachineRegisterInfo &MRI = *MIB.getMRI();
10491 unsigned NumBits = Ty.getScalarSizeInBits();
10492 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10493 if (!Ty.isVector() && ValVRegAndVal) {
10494 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10495 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10496 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10497 }
10498
10499 // Extend the byte value to the larger type, and then multiply by a magic
10500 // value 0x010101... in order to replicate it across every byte.
10501 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10502 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10503 return MIB.buildConstant(Ty, 0).getReg(0);
10504 }
10505
10506 LLT ExtType = Ty.getScalarType();
10507 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10508 if (NumBits > 8) {
10509 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10510 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10511 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10512 }
10513
10514 // For vector types create a G_BUILD_VECTOR.
10515 if (Ty.isVector())
10516 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10517
10518 return Val;
10519}
10520
10522LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10523 uint64_t KnownLen, Align Alignment,
10524 bool IsVolatile) {
10525 auto &MF = *MI.getParent()->getParent();
10526 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10527 auto &DL = MF.getDataLayout();
10528 LLVMContext &C = MF.getFunction().getContext();
10529
10530 assert(KnownLen != 0 && "Have a zero length memset length!");
10531
10532 bool DstAlignCanChange = false;
10533 MachineFrameInfo &MFI = MF.getFrameInfo();
10534 bool OptSize = shouldLowerMemFuncForSize(MF);
10535
10536 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10537 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10538 DstAlignCanChange = true;
10539
10540 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10541 std::vector<LLT> MemOps;
10542
10543 const auto &DstMMO = **MI.memoperands_begin();
10544 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10545
10546 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10547 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10548
10549 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10550 MemOp::Set(KnownLen, DstAlignCanChange,
10551 Alignment,
10552 /*IsZeroMemset=*/IsZeroVal,
10553 /*IsVolatile=*/IsVolatile),
10554 DstPtrInfo.getAddrSpace(), ~0u,
10555 MF.getFunction().getAttributes(), TLI))
10556 return UnableToLegalize;
10557
10558 if (DstAlignCanChange) {
10559 // Get an estimate of the type from the LLT.
10560 Type *IRTy = getTypeForLLT(MemOps[0], C);
10561 Align NewAlign = DL.getABITypeAlign(IRTy);
10562 if (NewAlign > Alignment) {
10563 Alignment = NewAlign;
10564 unsigned FI = FIDef->getOperand(1).getIndex();
10565 // Give the stack frame object a larger alignment if needed.
10566 if (MFI.getObjectAlign(FI) < Alignment)
10567 MFI.setObjectAlignment(FI, Alignment);
10568 }
10569 }
10570
10571 MachineIRBuilder MIB(MI);
10572 // Find the largest store and generate the bit pattern for it.
10573 LLT LargestTy = MemOps[0];
10574 for (unsigned i = 1; i < MemOps.size(); i++)
10575 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10576 LargestTy = MemOps[i];
10577
10578 // The memset stored value is always defined as an s8, so in order to make it
10579 // work with larger store types we need to repeat the bit pattern across the
10580 // wider type.
10581 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10582
10583 if (!MemSetValue)
10584 return UnableToLegalize;
10585
10586 // Generate the stores. For each store type in the list, we generate the
10587 // matching store of that type to the destination address.
10588 LLT PtrTy = MRI.getType(Dst);
10589 unsigned DstOff = 0;
10590 unsigned Size = KnownLen;
10591 for (unsigned I = 0; I < MemOps.size(); I++) {
10592 LLT Ty = MemOps[I];
10593 unsigned TySize = Ty.getSizeInBytes();
10594 if (TySize > Size) {
10595 // Issuing an unaligned load / store pair that overlaps with the previous
10596 // pair. Adjust the offset accordingly.
10597 assert(I == MemOps.size() - 1 && I != 0);
10598 DstOff -= TySize - Size;
10599 }
10600
10601 // If this store is smaller than the largest store see whether we can get
10602 // the smaller value for free with a truncate.
10603 Register Value = MemSetValue;
10604 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10605 MVT VT = getMVTForLLT(Ty);
10606 MVT LargestVT = getMVTForLLT(LargestTy);
10607 if (!LargestTy.isVector() && !Ty.isVector() &&
10608 TLI.isTruncateFree(LargestVT, VT))
10609 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10610 else
10611 Value = getMemsetValue(Val, Ty, MIB);
10612 if (!Value)
10613 return UnableToLegalize;
10614 }
10615
10616 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10617
10618 Register Ptr = Dst;
10619 if (DstOff != 0) {
10620 auto Offset =
10621 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10622 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10623 }
10624
10625 MIB.buildStore(Value, Ptr, *StoreMMO);
10626 DstOff += Ty.getSizeInBytes();
10627 Size -= TySize;
10628 }
10629
10630 MI.eraseFromParent();
10631 return Legalized;
10632}
10633
10635LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10636 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10637
10638 auto [Dst, Src, Len] = MI.getFirst3Regs();
10639
10640 const auto *MMOIt = MI.memoperands_begin();
10641 const MachineMemOperand *MemOp = *MMOIt;
10642 bool IsVolatile = MemOp->isVolatile();
10643
10644 // See if this is a constant length copy
10645 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10646 // FIXME: support dynamically sized G_MEMCPY_INLINE
10647 assert(LenVRegAndVal &&
10648 "inline memcpy with dynamic size is not yet supported");
10649 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10650 if (KnownLen == 0) {
10651 MI.eraseFromParent();
10652 return Legalized;
10653 }
10654
10655 const auto &DstMMO = **MI.memoperands_begin();
10656 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10657 Align DstAlign = DstMMO.getBaseAlign();
10658 Align SrcAlign = SrcMMO.getBaseAlign();
10659
10660 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10661 IsVolatile);
10662}
10663
10665LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10666 uint64_t KnownLen, Align DstAlign,
10667 Align SrcAlign, bool IsVolatile) {
10668 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10669 return lowerMemcpy(MI, Dst, Src, KnownLen,
10670 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10671 IsVolatile);
10672}
10673
10675LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10676 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10677 Align SrcAlign, bool IsVolatile) {
10678 auto &MF = *MI.getParent()->getParent();
10679 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10680 auto &DL = MF.getDataLayout();
10682
10683 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10684
10685 bool DstAlignCanChange = false;
10686 MachineFrameInfo &MFI = MF.getFrameInfo();
10687 Align Alignment = std::min(DstAlign, SrcAlign);
10688
10689 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10690 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10691 DstAlignCanChange = true;
10692
10693 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10694 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10695 // if the memcpy is in a tail call position.
10696
10697 std::vector<LLT> MemOps;
10698
10699 const auto &DstMMO = **MI.memoperands_begin();
10700 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10701 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10702 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10703
10705 MemOps, Limit,
10706 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10707 IsVolatile),
10708 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10709 MF.getFunction().getAttributes(), TLI))
10710 return UnableToLegalize;
10711
10712 if (DstAlignCanChange) {
10713 // Get an estimate of the type from the LLT.
10714 Type *IRTy = getTypeForLLT(MemOps[0], C);
10715 Align NewAlign = DL.getABITypeAlign(IRTy);
10716
10717 // Don't promote to an alignment that would require dynamic stack
10718 // realignment.
10720 if (!TRI->hasStackRealignment(MF))
10721 if (MaybeAlign StackAlign = DL.getStackAlignment())
10722 NewAlign = std::min(NewAlign, *StackAlign);
10723
10724 if (NewAlign > Alignment) {
10725 Alignment = NewAlign;
10726 unsigned FI = FIDef->getOperand(1).getIndex();
10727 // Give the stack frame object a larger alignment if needed.
10728 if (MFI.getObjectAlign(FI) < Alignment)
10729 MFI.setObjectAlignment(FI, Alignment);
10730 }
10731 }
10732
10733 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10734
10735 MachineIRBuilder MIB(MI);
10736 // Now we need to emit a pair of load and stores for each of the types we've
10737 // collected. I.e. for each type, generate a load from the source pointer of
10738 // that type width, and then generate a corresponding store to the dest buffer
10739 // of that value loaded. This can result in a sequence of loads and stores
10740 // mixed types, depending on what the target specifies as good types to use.
10741 unsigned CurrOffset = 0;
10742 unsigned Size = KnownLen;
10743 for (auto CopyTy : MemOps) {
10744 // Issuing an unaligned load / store pair that overlaps with the previous
10745 // pair. Adjust the offset accordingly.
10746 if (CopyTy.getSizeInBytes() > Size)
10747 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10748
10749 // Construct MMOs for the accesses.
10750 auto *LoadMMO =
10751 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10752 auto *StoreMMO =
10753 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10754
10755 // Create the load.
10756 Register LoadPtr = Src;
10758 if (CurrOffset != 0) {
10759 LLT SrcTy = MRI.getType(Src);
10760 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10761 .getReg(0);
10762 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10763 }
10764 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10765
10766 // Create the store.
10767 Register StorePtr = Dst;
10768 if (CurrOffset != 0) {
10769 LLT DstTy = MRI.getType(Dst);
10770 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10771 }
10772 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10773 CurrOffset += CopyTy.getSizeInBytes();
10774 Size -= CopyTy.getSizeInBytes();
10775 }
10776
10777 MI.eraseFromParent();
10778 return Legalized;
10779}
10780
10782LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10783 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10784 bool IsVolatile) {
10785 auto &MF = *MI.getParent()->getParent();
10786 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10787 auto &DL = MF.getDataLayout();
10788 LLVMContext &C = MF.getFunction().getContext();
10789
10790 assert(KnownLen != 0 && "Have a zero length memmove length!");
10791
10792 bool DstAlignCanChange = false;
10793 MachineFrameInfo &MFI = MF.getFrameInfo();
10794 bool OptSize = shouldLowerMemFuncForSize(MF);
10795 Align Alignment = std::min(DstAlign, SrcAlign);
10796
10797 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10798 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10799 DstAlignCanChange = true;
10800
10801 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10802 std::vector<LLT> MemOps;
10803
10804 const auto &DstMMO = **MI.memoperands_begin();
10805 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10806 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10807 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10808
10809 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10810 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10811 // same thing here.
10813 MemOps, Limit,
10814 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10815 /*IsVolatile*/ true),
10816 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10817 MF.getFunction().getAttributes(), TLI))
10818 return UnableToLegalize;
10819
10820 if (DstAlignCanChange) {
10821 // Get an estimate of the type from the LLT.
10822 Type *IRTy = getTypeForLLT(MemOps[0], C);
10823 Align NewAlign = DL.getABITypeAlign(IRTy);
10824
10825 // Don't promote to an alignment that would require dynamic stack
10826 // realignment.
10827 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10828 if (!TRI->hasStackRealignment(MF))
10829 if (MaybeAlign StackAlign = DL.getStackAlignment())
10830 NewAlign = std::min(NewAlign, *StackAlign);
10831
10832 if (NewAlign > Alignment) {
10833 Alignment = NewAlign;
10834 unsigned FI = FIDef->getOperand(1).getIndex();
10835 // Give the stack frame object a larger alignment if needed.
10836 if (MFI.getObjectAlign(FI) < Alignment)
10837 MFI.setObjectAlignment(FI, Alignment);
10838 }
10839 }
10840
10841 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10842
10843 MachineIRBuilder MIB(MI);
10844 // Memmove requires that we perform the loads first before issuing the stores.
10845 // Apart from that, this loop is pretty much doing the same thing as the
10846 // memcpy codegen function.
10847 unsigned CurrOffset = 0;
10848 SmallVector<Register, 16> LoadVals;
10849 for (auto CopyTy : MemOps) {
10850 // Construct MMO for the load.
10851 auto *LoadMMO =
10852 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10853
10854 // Create the load.
10855 Register LoadPtr = Src;
10856 if (CurrOffset != 0) {
10857 LLT SrcTy = MRI.getType(Src);
10858 auto Offset =
10859 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10860 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10861 }
10862 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10863 CurrOffset += CopyTy.getSizeInBytes();
10864 }
10865
10866 CurrOffset = 0;
10867 for (unsigned I = 0; I < MemOps.size(); ++I) {
10868 LLT CopyTy = MemOps[I];
10869 // Now store the values loaded.
10870 auto *StoreMMO =
10871 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10872
10873 Register StorePtr = Dst;
10874 if (CurrOffset != 0) {
10875 LLT DstTy = MRI.getType(Dst);
10876 auto Offset =
10877 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10878 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10879 }
10880 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10881 CurrOffset += CopyTy.getSizeInBytes();
10882 }
10883 MI.eraseFromParent();
10884 return Legalized;
10885}
10886
10889 const unsigned Opc = MI.getOpcode();
10890 // This combine is fairly complex so it's not written with a separate
10891 // matcher function.
10892 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10893 Opc == TargetOpcode::G_MEMSET) &&
10894 "Expected memcpy like instruction");
10895
10896 auto MMOIt = MI.memoperands_begin();
10897 const MachineMemOperand *MemOp = *MMOIt;
10898
10899 Align DstAlign = MemOp->getBaseAlign();
10900 Align SrcAlign;
10901 auto [Dst, Src, Len] = MI.getFirst3Regs();
10902
10903 if (Opc != TargetOpcode::G_MEMSET) {
10904 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10905 MemOp = *(++MMOIt);
10906 SrcAlign = MemOp->getBaseAlign();
10907 }
10908
10909 // See if this is a constant length copy
10910 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10911 if (!LenVRegAndVal)
10912 return UnableToLegalize;
10913 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10914
10915 if (KnownLen == 0) {
10916 MI.eraseFromParent();
10917 return Legalized;
10918 }
10919
10920 if (MaxLen && KnownLen > MaxLen)
10921 return UnableToLegalize;
10922
10923 bool IsVolatile = MemOp->isVolatile();
10924 if (Opc == TargetOpcode::G_MEMCPY) {
10925 auto &MF = *MI.getParent()->getParent();
10926 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10927 bool OptSize = shouldLowerMemFuncForSize(MF);
10928 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10929 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10930 IsVolatile);
10931 }
10932 if (Opc == TargetOpcode::G_MEMMOVE)
10933 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10934 if (Opc == TargetOpcode::G_MEMSET)
10935 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10936 return UnableToLegalize;
10937}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1398
APInt bitcastToAPInt() const
Definition APFloat.h:1404
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1189
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1149
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1160
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1483
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:325
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:639
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2041
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1570
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1627
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1194
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:508
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:347
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1282
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:611
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.