LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
525 MachineRegisterInfo &MRI) {
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
644 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
645}
646
647// Useful for libcalls where all operands have the same type.
650 unsigned Size, Type *OpType,
651 LostDebugLocObserver &LocObserver) const {
652 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
653
654 // FIXME: What does the original arg index mean here?
656 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
657 Args.push_back({MO.getReg(), OpType, 0});
658 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
659 LocObserver, &MI);
660}
661
662LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
663 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
664 LostDebugLocObserver &LocObserver) {
665 MachineFunction &MF = *MI.getMF();
667
668 Register DstSin = MI.getOperand(0).getReg();
669 Register DstCos = MI.getOperand(1).getReg();
670 Register Src = MI.getOperand(2).getReg();
671 LLT DstTy = MRI.getType(DstSin);
672
673 int MemSize = DstTy.getSizeInBytes();
674 Align Alignment = getStackTemporaryAlignment(DstTy);
676 unsigned AddrSpace = DL.getAllocaAddrSpace();
677 MachinePointerInfo PtrInfo;
678
679 Register StackPtrSin =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682 Register StackPtrCos =
683 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
684 .getReg(0);
685
686 auto &Ctx = MF.getFunction().getContext();
687 auto LibcallResult = createLibcall(
688 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
689 {{Src, OpType, 0},
690 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
691 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
692 LocObserver, &MI);
693
694 if (LibcallResult != LegalizeResult::Legalized)
696
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701
702 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
705
707}
708
710LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
711 unsigned Size, Type *OpType,
712 LostDebugLocObserver &LocObserver) {
713 MachineFunction &MF = MIRBuilder.getMF();
714 MachineRegisterInfo &MRI = MF.getRegInfo();
715
716 Register DstFrac = MI.getOperand(0).getReg();
717 Register DstInt = MI.getOperand(1).getReg();
718 Register Src = MI.getOperand(2).getReg();
719 LLT DstTy = MRI.getType(DstFrac);
720
721 int MemSize = DstTy.getSizeInBytes();
722 Align Alignment = getStackTemporaryAlignment(DstTy);
723 const DataLayout &DL = MIRBuilder.getDataLayout();
724 unsigned AddrSpace = DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
726
727 Register StackPtrInt =
728 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
729 .getReg(0);
730
731 auto &Ctx = MF.getFunction().getContext();
732 auto LibcallResult = createLibcall(
733 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
735 LocObserver, &MI);
736
737 if (LibcallResult != LegalizeResult::Legalized)
739
741 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
742
743 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
745
747}
748
749static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
750 Type *FromType) {
751 auto ToMVT = MVT::getVT(ToType);
752 auto FromMVT = MVT::getVT(FromType);
753
754 switch (Opcode) {
755 case TargetOpcode::G_FPEXT:
756 return RTLIB::getFPEXT(FromMVT, ToMVT);
757 case TargetOpcode::G_FPTRUNC:
758 return RTLIB::getFPROUND(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTOSI:
760 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOUI:
762 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
763 case TargetOpcode::G_SITOFP:
764 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
765 case TargetOpcode::G_UITOFP:
766 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
767 }
768 llvm_unreachable("Unsupported libcall function");
769}
770
772 MachineInstr &MI, Type *ToType, Type *FromType,
773 LostDebugLocObserver &LocObserver, bool IsSigned) const {
774 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.Flags[0].setSExt();
778 else
779 Arg.Flags[0].setZExt();
780 }
781
782 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
783 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
784 LocObserver, &MI);
785}
786
789 LostDebugLocObserver &LocObserver) const {
790 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791
793 // Add all the args, except for the last which is an imm denoting 'tail'.
794 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
795 Register Reg = MI.getOperand(i).getReg();
796
797 // Need derive an IR type for call lowering.
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy = nullptr;
800 if (OpLLT.isPointer())
801 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
802 else
803 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
804 Args.push_back({Reg, OpTy, 0});
805 }
806
807 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc = MI.getOpcode();
810 switch (Opc) {
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
813 break;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
817 break;
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
821 break;
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
825 break;
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 if (!Libcalls) // FIXME: Should be mandatory
832
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (RTLibcallImpl == RTLIB::Unsupported) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
844
845 StringRef LibcallName =
847 Info.Callee = MachineOperand::CreateES(LibcallName.data());
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 if (!Libcalls) // FIXME: Should be mandatory
985
986 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
987 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
989
990 // Unsupported libcall on the target.
991 if (RTLibcallImpl == RTLIB::Unsupported) {
992 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
993 << MIRBuilder.getTII().getName(Opc) << "\n");
995 }
996
998 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
999
1000 StringRef LibcallName =
1002 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1003 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1004
1005 llvm::append_range(Info.OrigArgs, Args);
1006 if (!CLI.lowerCall(MIRBuilder, Info))
1008
1010}
1011
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1018 break;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1022 break;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1025 break;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1029 break;
1030 default:
1031 llvm_unreachable("Unexpected opcode");
1032 }
1033 return RTLibcall;
1034}
1035
1036// Some library functions that read FP state (fegetmode, fegetenv) write the
1037// state into a region in memory. IR intrinsics that do the same operations
1038// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1039// intrinsics via the library functions, we need to use temporary variable,
1040// for example:
1041//
1042// %0:_(s32) = G_GET_FPMODE
1043//
1044// is transformed to:
1045//
1046// %1:_(p0) = G_FRAME_INDEX %stack.0
1047// BL &fegetmode
1048// %0:_(s32) = G_LOAD % 1
1049//
1051LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1052 LostDebugLocObserver &LocObserver) {
1053 const DataLayout &DL = MIRBuilder.getDataLayout();
1054 auto &MF = MIRBuilder.getMF();
1055 auto &MRI = *MIRBuilder.getMRI();
1056 auto &Ctx = MF.getFunction().getContext();
1057
1058 // Create temporary, where library function will put the read state.
1059 Register Dst = MI.getOperand(0).getReg();
1060 LLT StateTy = MRI.getType(Dst);
1061 TypeSize StateSize = StateTy.getSizeInBytes();
1062 Align TempAlign = getStackTemporaryAlignment(StateTy);
1063 MachinePointerInfo TempPtrInfo;
1064 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1065
1066 // Create a call to library function, with the temporary as an argument.
1067 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1068 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1069 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1070 auto Res = createLibcall(
1071 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1073 nullptr);
1074 if (Res != LegalizerHelper::Legalized)
1075 return Res;
1076
1077 // Create a load from the temporary.
1078 MachineMemOperand *MMO = MF.getMachineMemOperand(
1079 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1081
1083}
1084
1085// Similar to `createGetStateLibcall` the function calls a library function
1086// using transient space in stack. In this case the library function reads
1087// content of memory region.
1089LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1090 LostDebugLocObserver &LocObserver) {
1091 const DataLayout &DL = MIRBuilder.getDataLayout();
1092 auto &MF = MIRBuilder.getMF();
1093 auto &MRI = *MIRBuilder.getMRI();
1094 auto &Ctx = MF.getFunction().getContext();
1095
1096 // Create temporary, where library function will get the new state.
1097 Register Src = MI.getOperand(0).getReg();
1098 LLT StateTy = MRI.getType(Src);
1099 TypeSize StateSize = StateTy.getSizeInBytes();
1100 Align TempAlign = getStackTemporaryAlignment(StateTy);
1101 MachinePointerInfo TempPtrInfo;
1102 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1103
1104 // Put the new state into the temporary.
1105 MachineMemOperand *MMO = MF.getMachineMemOperand(
1106 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1107 MIRBuilder.buildStore(Src, Temp, *MMO);
1108
1109 // Create a call to library function, with the temporary as an argument.
1110 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1111 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1112 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1113 return createLibcall(RTLibcall,
1114 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver, nullptr);
1117}
1118
1119/// Returns the corresponding libcall for the given Pred and
1120/// the ICMP predicate that should be generated to compare with #0
1121/// after the libcall.
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1125 do { \
1126 switch (Size) { \
1127 case 32: \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1129 case 64: \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1131 case 128: \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1133 default: \
1134 llvm_unreachable("unexpected size"); \
1135 } \
1136 } while (0)
1137
1138 switch (Pred) {
1139 case CmpInst::FCMP_OEQ:
1141 case CmpInst::FCMP_UNE:
1143 case CmpInst::FCMP_OGE:
1145 case CmpInst::FCMP_OLT:
1147 case CmpInst::FCMP_OLE:
1149 case CmpInst::FCMP_OGT:
1151 case CmpInst::FCMP_UNO:
1153 default:
1154 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1155 }
1156}
1157
1159LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1160 LostDebugLocObserver &LocObserver) {
1161 auto &MF = MIRBuilder.getMF();
1162 auto &Ctx = MF.getFunction().getContext();
1163 const GFCmp *Cmp = cast<GFCmp>(&MI);
1164
1165 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1166 unsigned Size = OpLLT.getSizeInBits();
1167 if ((Size != 32 && Size != 64 && Size != 128) ||
1168 OpLLT != MRI.getType(Cmp->getRHSReg()))
1169 return UnableToLegalize;
1170
1171 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1172
1173 // DstReg type is s32
1174 const Register DstReg = Cmp->getReg(0);
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond = Cmp->getCond();
1177
1178 // Reference:
1179 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1180 // Generates a libcall followed by ICMP.
1181 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1182 const CmpInst::Predicate ICmpPred,
1183 const DstOp &Res) -> Register {
1184 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1185 LLT TempLLT = LLT::integer(32);
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1187 // Generate libcall, holding result in Temp
1188 const auto Status = createLibcall(
1189 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1190 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1191 LocObserver, &MI);
1192 if (!Status)
1193 return {};
1194
1195 // Compare temp with #0 to get the final result.
1196 return MIRBuilder
1197 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1198 .getReg(0);
1199 };
1200
1201 // Simple case if we have a direct mapping from predicate to libcall
1202 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1204 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1205 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1206 return Legalized;
1207 }
1208 return UnableToLegalize;
1209 }
1210
1211 // No direct mapping found, should be generated as combination of libcalls.
1212
1213 switch (Cond) {
1214 case CmpInst::FCMP_UEQ: {
1215 // FCMP_UEQ: unordered or equal
1216 // Convert into (FCMP_OEQ || FCMP_UNO).
1217
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1221
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1225 if (Oeq && Uno)
1226 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1227 else
1228 return UnableToLegalize;
1229
1230 break;
1231 }
1232 case CmpInst::FCMP_ONE: {
1233 // FCMP_ONE: ordered and operands are unequal
1234 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1235
1236 // We inverse the predicate instead of generating a NOT
1237 // to save one instruction.
1238 // On AArch64 isel can even select two cmp into a single ccmp.
1239 const auto [OeqLibcall, OeqPred] =
1241 const auto NotOeq =
1242 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1243
1244 const auto [UnoLibcall, UnoPred] =
1246 const auto NotUno =
1247 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1248
1249 if (NotOeq && NotUno)
1250 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1251 else
1252 return UnableToLegalize;
1253
1254 break;
1255 }
1256 case CmpInst::FCMP_ULT:
1257 case CmpInst::FCMP_UGE:
1258 case CmpInst::FCMP_UGT:
1259 case CmpInst::FCMP_ULE:
1260 case CmpInst::FCMP_ORD: {
1261 // Convert into: !(inverse(Pred))
1262 // E.g. FCMP_ULT becomes !FCMP_OGE
1263 // This is equivalent to the following, but saves some instructions.
1264 // MIRBuilder.buildNot(
1265 // PredTy,
1266 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1267 // Op1, Op2));
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1271 CmpInst::getInversePredicate(InversedPred), DstReg))
1272 return UnableToLegalize;
1273 break;
1274 }
1275 default:
1276 return UnableToLegalize;
1277 }
1278
1279 return Legalized;
1280}
1281
1282// The function is used to legalize operations that set default environment
1283// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1284// On most targets supported in glibc FE_DFL_MODE is defined as
1285// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1286// it is not true, the target must provide custom lowering.
1288LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1289 LostDebugLocObserver &LocObserver) {
1290 const DataLayout &DL = MIRBuilder.getDataLayout();
1291 auto &MF = MIRBuilder.getMF();
1292 auto &Ctx = MF.getFunction().getContext();
1293
1294 // Create an argument for the library function.
1295 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1296 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1297 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1298 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1299 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1301 MIRBuilder.buildIntToPtr(Dest, DefValue);
1302
1303 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1304 return createLibcall(
1305 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1307}
1308
1311 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1312
1313 switch (MI.getOpcode()) {
1314 default:
1315 return UnableToLegalize;
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = IntegerType::get(Ctx, Size);
1325 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 break;
1329 }
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1366 unsigned Size = LLTy.getSizeInBits();
1367 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1368 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1369 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1370 return UnableToLegalize;
1371 }
1372 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1373 if (Status != Legalized)
1374 return Status;
1375 break;
1376 }
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1379 unsigned Size = LLTy.getSizeInBits();
1380 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1381 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1382 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1383 return UnableToLegalize;
1384 }
1385 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1386 }
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1389 unsigned Size = LLTy.getSizeInBits();
1390 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1391 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1392 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1393 return UnableToLegalize;
1394 }
1395 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1396 }
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1402 unsigned Size = LLTy.getSizeInBits();
1403 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1404 Type *ITy = IntegerType::get(
1405 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1406 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1407 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1408 return UnableToLegalize;
1409 }
1410 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1412 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1413 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1414 if (Status != Legalized)
1415 return Status;
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1422 unsigned Size = LLTy.getSizeInBits();
1423 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1424 Type *ITy = IntegerType::get(
1425 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1426 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1427 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1428 return UnableToLegalize;
1429 }
1430 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1432 {MI.getOperand(1).getReg(), HLTy, 0},
1433 {MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1447 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 FromTy, LocObserver);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1483 if (Status != Legalized)
1484 return Status;
1485 break;
1486 }
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 if (Status != Legalized)
1497 return Status;
1498 break;
1499 }
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1504 LegalizeResult Result =
1505 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1506 if (Result != Legalized)
1507 return Result;
1508 MI.eraseFromParent();
1509 return Result;
1510 }
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1513 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1514 if (Result != Legalized)
1515 return Result;
1516 break;
1517 }
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1520 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1521 if (Result != Legalized)
1522 return Result;
1523 break;
1524 }
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1527 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1528 if (Result != Legalized)
1529 return Result;
1530 break;
1531 }
1532 }
1533
1534 MI.eraseFromParent();
1535 return Legalized;
1536}
1537
1539 unsigned TypeIdx,
1540 LLT NarrowTy) {
1541 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1542 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 return UnableToLegalize;
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 LLT DstTy = MRI.getType(DstReg);
1550
1551 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1552 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1553 // FIXME: Although this would also be legal for the general case, it causes
1554 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1555 // combines not being hit). This seems to be a problem related to the
1556 // artifact combiner.
1557 if (SizeOp0 % NarrowSize != 0) {
1558 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1559 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1560 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565
1566 int NumParts = SizeOp0 / NarrowSize;
1567
1569 for (int i = 0; i < NumParts; ++i)
1570 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1571
1572 if (DstTy.isVector())
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1574 else
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1577 return Legalized;
1578 }
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1581 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1583 unsigned NarrowSize = NarrowTy.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1585
1586 SmallVector<Register, 4> PartRegs;
1587 for (int I = 0; I != NumParts; ++I) {
1588 unsigned Offset = I * NarrowSize;
1589 auto K = MIRBuilder.buildConstant(NarrowTy,
1590 Val.lshr(Offset).trunc(NarrowSize));
1591 PartRegs.push_back(K.getReg(0));
1592 }
1593
1594 LLT LeftoverTy;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1596 SmallVector<Register, 1> LeftoverRegs;
1597 if (LeftoverBits != 0) {
1598 LeftoverTy = LLT::scalar(LeftoverBits);
1599 auto K = MIRBuilder.buildConstant(
1600 LeftoverTy,
1601 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1602 LeftoverRegs.push_back(K.getReg(0));
1603 }
1604
1605 insertParts(MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1607
1608 MI.eraseFromParent();
1609 return Legalized;
1610 }
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1614 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1615 case TargetOpcode::G_TRUNC: {
1616 if (TypeIdx != 1)
1617 return UnableToLegalize;
1618
1619 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1620 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1621 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1622 return UnableToLegalize;
1623 }
1624
1625 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1626 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1628 return Legalized;
1629 }
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1632 if (TypeIdx != 0)
1633 return UnableToLegalize;
1634
1635 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1636 // Should widen scalar first
1637 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1638 return UnableToLegalize;
1639
1640 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1642 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1643 Parts.push_back(
1644 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1645 .getReg(0));
1646 }
1647
1648 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1650 return Legalized;
1651 }
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1662 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1665 return narrowScalarMul(MI, NarrowTy);
1666 case TargetOpcode::G_EXTRACT:
1667 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1668 case TargetOpcode::G_INSERT:
1669 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_LOAD: {
1671 auto &LoadMI = cast<GLoad>(MI);
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1674 if (DstTy.isVector())
1675 return UnableToLegalize;
1676
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1680 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1681 LoadMI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1686 }
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1689 auto &LoadMI = cast<GExtLoad>(MI);
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1692
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1695 unsigned MemSize = MMO.getSizeInBits().getValue();
1696
1697 if (MemSize == NarrowSize) {
1698 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1699 } else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 } else if (MemSize > NarrowSize) {
1702 // FIXME: Need to split the load.
1703 return UnableToLegalize;
1704 }
1705
1706 if (isa<GZExtLoad>(LoadMI))
1707 MIRBuilder.buildZExt(DstReg, TmpReg);
1708 else
1709 MIRBuilder.buildSExt(DstReg, TmpReg);
1710
1711 LoadMI.eraseFromParent();
1712 return Legalized;
1713 }
1714 case TargetOpcode::G_STORE: {
1715 auto &StoreMI = cast<GStore>(MI);
1716
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1720 return UnableToLegalize;
1721
1722 int NumParts = SizeOp0 / NarrowSize;
1723 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1726 return UnableToLegalize;
1727
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1730 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1733 return Legalized;
1734 }
1735
1736 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1737 }
1738 case TargetOpcode::G_SELECT:
1739 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1743 // Legalize bitwise operation:
1744 // A = BinOp<Ty> B, C
1745 // into:
1746 // B1, ..., BN = G_UNMERGE_VALUES B
1747 // C1, ..., CN = G_UNMERGE_VALUES C
1748 // A1 = BinOp<Ty/N> B1, C2
1749 // ...
1750 // AN = BinOp<Ty/N> BN, CN
1751 // A = G_MERGE_VALUES A1, ..., AN
1752 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1753 }
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1757 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1764 if (TypeIdx == 1)
1765 switch (MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1768 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1771 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1772 case TargetOpcode::G_CTPOP:
1773 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTLS:
1775 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1776 default:
1777 return UnableToLegalize;
1778 }
1779
1780 Observer.changingInstr(MI);
1781 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1782 Observer.changedInstr(MI);
1783 return Legalized;
1784 case TargetOpcode::G_INTTOPTR:
1785 if (TypeIdx != 1)
1786 return UnableToLegalize;
1787
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, 1);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 case TargetOpcode::G_PTRTOINT:
1793 if (TypeIdx != 0)
1794 return UnableToLegalize;
1795
1796 Observer.changingInstr(MI);
1797 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1798 Observer.changedInstr(MI);
1799 return Legalized;
1800 case TargetOpcode::G_PHI: {
1801 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1802 // NarrowSize.
1803 if (SizeOp0 % NarrowSize != 0)
1804 return UnableToLegalize;
1805
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1807 SmallVector<Register, 2> DstRegs(NumParts);
1808 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1809 Observer.changingInstr(MI);
1810 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1811 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1812 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1813 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1814 SrcRegs[i / 2], MIRBuilder, MRI);
1815 }
1816 MachineBasicBlock &MBB = *MI.getParent();
1817 MIRBuilder.setInsertPt(MBB, MI);
1818 for (unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1823 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1824 }
1825 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1826 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1827 Observer.changedInstr(MI);
1828 MI.eraseFromParent();
1829 return Legalized;
1830 }
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1833 if (TypeIdx != 2)
1834 return UnableToLegalize;
1835
1836 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1837 Observer.changingInstr(MI);
1838 narrowScalarSrc(MI, NarrowTy, OpIdx);
1839 Observer.changedInstr(MI);
1840 return Legalized;
1841 }
1842 case TargetOpcode::G_ICMP: {
1843 Register LHS = MI.getOperand(2).getReg();
1844 LLT SrcTy = MRI.getType(LHS);
1845 CmpInst::Predicate Pred =
1846 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1847
1848 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1849 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1850 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1851 LHSLeftoverRegs, MIRBuilder, MRI))
1852 return UnableToLegalize;
1853
1854 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1855 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1856 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1858 return UnableToLegalize;
1859
1860 // We now have the LHS and RHS of the compare split into narrow-type
1861 // registers, plus potentially some leftover type.
1862 Register Dst = MI.getOperand(0).getReg();
1863 LLT ResTy = MRI.getType(Dst);
1864 if (ICmpInst::isEquality(Pred)) {
1865 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1866 // them together. For each equal part, the result should be all 0s. For
1867 // each non-equal part, we'll get at least one 1.
1868 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1874 Xors.push_back(Xor);
1875 }
1876
1877 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1878 // to the desired narrow type so that we can OR them together later.
1879 SmallVector<Register, 4> WidenedXors;
1880 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1887 llvm::append_range(Xors, WidenedXors);
1888 }
1889
1890 // Now, for each part we broke up, we know if they are equal/not equal
1891 // based off the G_XOR. We can OR these all together and compare against
1892 // 0 to get the result.
1893 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1894 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1896 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1897 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1898 } else {
1899 Register CmpIn;
1900 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1901 Register CmpOut;
1902 CmpInst::Predicate PartPred;
1903
1904 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1905 PartPred = Pred;
1906 CmpOut = Dst;
1907 } else {
1908 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1910 }
1911
1912 if (!CmpIn) {
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1914 RHSPartRegs[I]);
1915 } else {
1916 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1919 LHSPartRegs[I], RHSPartRegs[I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1921 }
1922
1923 CmpIn = CmpOut;
1924 }
1925
1926 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1940 RHSLeftoverRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 auto CmpEq =
1945 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1946 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1948 }
1949
1950 CmpIn = CmpOut;
1951 }
1952 }
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
1956 case TargetOpcode::G_FCMP:
1957 if (TypeIdx != 0)
1958 return UnableToLegalize;
1959
1960 Observer.changingInstr(MI);
1961 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1962 Observer.changedInstr(MI);
1963 return Legalized;
1964
1965 case TargetOpcode::G_SEXT_INREG: {
1966 if (TypeIdx != 0)
1967 return UnableToLegalize;
1968
1969 int64_t SizeInBits = MI.getOperand(2).getImm();
1970
1971 // So long as the new type has more bits than the bits we're extending we
1972 // don't need to break it apart.
1973 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1974 Observer.changingInstr(MI);
1975 // We don't lose any non-extension bits by truncating the src and
1976 // sign-extending the dst.
1977 MachineOperand &MO1 = MI.getOperand(1);
1978 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.setReg(TruncMIB.getReg(0));
1980
1981 MachineOperand &MO2 = MI.getOperand(0);
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1983 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1984 MIRBuilder.buildSExt(MO2, DstExt);
1985 MO2.setReg(DstExt);
1986 Observer.changedInstr(MI);
1987 return Legalized;
1988 }
1989
1990 // Break it apart. Components below the extension point are unmodified. The
1991 // component containing the extension point becomes a narrower SEXT_INREG.
1992 // Components above it are ashr'd from the component containing the
1993 // extension point.
1994 if (SizeOp0 % NarrowSize != 0)
1995 return UnableToLegalize;
1996 int NumParts = SizeOp0 / NarrowSize;
1997
1998 // List the registers where the destination will be scattered.
2000 // List the registers where the source will be split.
2002
2003 // Create all the temporary registers.
2004 for (int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2006
2007 SrcRegs.push_back(SrcReg);
2008 }
2009
2010 // Explode the big arguments into smaller chunks.
2011 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2012
2013 Register AshrCstReg =
2014 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2015 .getReg(0);
2016 Register FullExtensionReg;
2017 Register PartialExtensionReg;
2018
2019 // Do the operation on each small part.
2020 for (int i = 0; i < NumParts; ++i) {
2021 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2022 DstRegs.push_back(SrcRegs[i]);
2023 PartialExtensionReg = DstRegs.back();
2024 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2028 DstRegs.push_back(FullExtensionReg);
2029 continue;
2030 }
2031 DstRegs.push_back(
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2033 .getReg(0));
2034 FullExtensionReg = DstRegs.back();
2035 } else {
2036 DstRegs.push_back(
2038 .buildInstr(
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2040 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2041 .getReg(0));
2042 PartialExtensionReg = DstRegs.back();
2043 }
2044 }
2045
2046 // Gather the destination registers into the final destination.
2047 Register DstReg = MI.getOperand(0).getReg();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2050 return Legalized;
2051 }
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2055 return UnableToLegalize;
2056
2057 Observer.changingInstr(MI);
2058 SmallVector<Register, 2> SrcRegs, DstRegs;
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2061 MIRBuilder, MRI);
2062
2063 for (unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2066 DstRegs.push_back(DstPart.getReg(0));
2067 }
2068
2069 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2070
2071 Observer.changedInstr(MI);
2072 MI.eraseFromParent();
2073 return Legalized;
2074 }
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079 Observer.changingInstr(MI);
2080 narrowScalarSrc(MI, NarrowTy, 2);
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 }
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2088 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2089 case TargetOpcode::G_FPEXT:
2090 if (TypeIdx != 0)
2091 return UnableToLegalize;
2092 Observer.changingInstr(MI);
2093 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2094 Observer.changedInstr(MI);
2095 return Legalized;
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2098 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2099 case TargetOpcode::G_VSCALE: {
2100 Register Dst = MI.getOperand(0).getReg();
2101 LLT Ty = MRI.getType(Dst);
2102
2103 // Assume VSCALE(1) fits into a legal integer
2104 const APInt One(NarrowTy.getSizeInBits(), 1);
2105 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2108 MIRBuilder.buildMul(Dst, ZExt, C);
2109
2110 MI.eraseFromParent();
2111 return Legalized;
2112 }
2113 }
2114}
2115
2117 LLT Ty = MRI.getType(Val);
2118 if (Ty.isScalar())
2119 return Val;
2120
2121 const DataLayout &DL = MIRBuilder.getDataLayout();
2122 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2123 if (Ty.isPointer()) {
2124 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2125 return Register();
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2127 }
2128
2129 Register NewVal = Val;
2130
2131 assert(Ty.isVector());
2132 if (Ty.isPointerVector())
2133 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2135}
2136
2138 unsigned OpIdx, unsigned ExtOpcode) {
2139 MachineOperand &MO = MI.getOperand(OpIdx);
2140 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.setReg(ExtB.getReg(0));
2142}
2143
2145 unsigned OpIdx) {
2146 MachineOperand &MO = MI.getOperand(OpIdx);
2147 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.setReg(ExtB.getReg(0));
2149}
2150
2152 unsigned OpIdx, unsigned TruncOpcode) {
2153 MachineOperand &MO = MI.getOperand(OpIdx);
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2155 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2157 MO.setReg(DstExt);
2158}
2159
2161 unsigned OpIdx, unsigned ExtOpcode) {
2162 MachineOperand &MO = MI.getOperand(OpIdx);
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2164 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2166 MO.setReg(DstTrunc);
2167}
2168
2170 unsigned OpIdx) {
2171 MachineOperand &MO = MI.getOperand(OpIdx);
2172 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2173 Register Dst = MO.getReg();
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2175 MO.setReg(DstExt);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2177}
2178
2180 unsigned OpIdx) {
2181 MachineOperand &MO = MI.getOperand(OpIdx);
2182 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2183}
2184
2186 MachineOperand &Op = MI.getOperand(OpIdx);
2187 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2188}
2189
2191 MachineOperand &MO = MI.getOperand(OpIdx);
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2193 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2194 MIRBuilder.buildBitcast(MO, CastDst);
2195 MO.setReg(CastDst);
2196}
2197
2199LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2200 LLT WideTy) {
2201 if (TypeIdx != 1)
2202 return UnableToLegalize;
2203
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2206 return UnableToLegalize;
2207
2208 LLT SrcTy = MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2211 const int WideSize = WideTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2213
2214 unsigned NumOps = MI.getNumOperands();
2215 unsigned NumSrc = MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2217
2218 if (WideSize >= DstSize) {
2219 // Directly pack the bits in the target type.
2220 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2221
2222 for (unsigned I = 2; I != NumOps; ++I) {
2223 const unsigned Offset = (I - 1) * PartSize;
2224
2225 Register SrcReg = MI.getOperand(I).getReg();
2226 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2227
2228 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2229
2230 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2231 MRI.createGenericVirtualRegister(WideTy);
2232
2233 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2234 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2235 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2236 ResultReg = NextResult;
2237 }
2238
2239 if (WideSize > DstSize)
2240 MIRBuilder.buildTrunc(DstReg, ResultReg);
2241 else if (DstTy.isPointer())
2242 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2243
2244 MI.eraseFromParent();
2245 return Legalized;
2246 }
2247
2248 // Unmerge the original values to the GCD type, and recombine to the next
2249 // multiple greater than the original type.
2250 //
2251 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2252 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2253 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2254 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2255 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2256 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2257 // %12:_(s12) = G_MERGE_VALUES %10, %11
2258 //
2259 // Padding with undef if necessary:
2260 //
2261 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2262 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2263 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2264 // %7:_(s2) = G_IMPLICIT_DEF
2265 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2266 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2267 // %10:_(s12) = G_MERGE_VALUES %8, %9
2268
2269 const int GCD = std::gcd(SrcSize, WideSize);
2270 LLT GCDTy = LLT::scalar(GCD);
2271
2272 SmallVector<Register, 8> NewMergeRegs;
2273 SmallVector<Register, 8> Unmerges;
2274 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2275
2276 // Decompose the original operands if they don't evenly divide.
2277 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2278 Register SrcReg = MO.getReg();
2279 if (GCD == SrcSize) {
2280 Unmerges.push_back(SrcReg);
2281 } else {
2282 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2284 Unmerges.push_back(Unmerge.getReg(J));
2285 }
2286 }
2287
2288 // Pad with undef to the next size that is a multiple of the requested size.
2289 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2290 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2291 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2292 Unmerges.push_back(UndefReg);
2293 }
2294
2295 const int PartsPerGCD = WideSize / GCD;
2296
2297 // Build merges of each piece.
2298 ArrayRef<Register> Slicer(Unmerges);
2299 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2300 auto Merge =
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2302 NewMergeRegs.push_back(Merge.getReg(0));
2303 }
2304
2305 // A truncate may be necessary if the requested type doesn't evenly divide the
2306 // original result type.
2307 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2309 } else {
2310 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2312 }
2313
2314 MI.eraseFromParent();
2315 return Legalized;
2316}
2317
2319LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2320 LLT WideTy) {
2321 if (TypeIdx != 0)
2322 return UnableToLegalize;
2323
2324 int NumDst = MI.getNumOperands() - 1;
2325 Register SrcReg = MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2327 if (SrcTy.isVector())
2328 return UnableToLegalize;
2329
2330 Register Dst0Reg = MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2332 if (!DstTy.isScalar())
2333 return UnableToLegalize;
2334
2335 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2336 if (SrcTy.isPointer()) {
2337 const DataLayout &DL = MIRBuilder.getDataLayout();
2338 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2339 LLVM_DEBUG(
2340 dbgs() << "Not casting non-integral address space integer\n");
2341 return UnableToLegalize;
2342 }
2343
2344 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2345 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2346 }
2347
2348 // Widen SrcTy to WideTy. This does not affect the result, but since the
2349 // user requested this size, it is probably better handled than SrcTy and
2350 // should reduce the total number of legalization artifacts.
2351 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2352 SrcTy = WideTy;
2353 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2354 }
2355
2356 // Theres no unmerge type to target. Directly extract the bits from the
2357 // source type
2358 unsigned DstSize = DstTy.getSizeInBits();
2359
2360 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2361 for (int I = 1; I != NumDst; ++I) {
2362 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2363 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2364 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2365 }
2366
2367 MI.eraseFromParent();
2368 return Legalized;
2369 }
2370
2371 // Extend the source to a wider type.
2372 LLT LCMTy = getLCMType(SrcTy, WideTy);
2373
2374 Register WideSrc = SrcReg;
2375 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2376 // TODO: If this is an integral address space, cast to integer and anyext.
2377 if (SrcTy.isPointer()) {
2378 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2379 return UnableToLegalize;
2380 }
2381
2382 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2383 }
2384
2385 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2386
2387 // Create a sequence of unmerges and merges to the original results. Since we
2388 // may have widened the source, we will need to pad the results with dead defs
2389 // to cover the source register.
2390 // e.g. widen s48 to s64:
2391 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2392 //
2393 // =>
2394 // %4:_(s192) = G_ANYEXT %0:_(s96)
2395 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2396 // ; unpack to GCD type, with extra dead defs
2397 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2398 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2399 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2400 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2401 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2402 const LLT GCDTy = getGCDType(WideTy, DstTy);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2404 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2405
2406 // Directly unmerge to the destination without going through a GCD type
2407 // if possible
2408 if (PartsPerRemerge == 1) {
2409 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2410
2411 for (int I = 0; I != NumUnmerge; ++I) {
2412 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2413
2414 for (int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx = I * PartsPerUnmerge + J;
2416 if (Idx < NumDst)
2417 MIB.addDef(MI.getOperand(Idx).getReg());
2418 else {
2419 // Create dead def for excess components.
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2421 }
2422 }
2423
2424 MIB.addUse(Unmerge.getReg(I));
2425 }
2426 } else {
2427 SmallVector<Register, 16> Parts;
2428 for (int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2430
2431 SmallVector<Register, 8> RemergeParts;
2432 for (int I = 0; I != NumDst; ++I) {
2433 for (int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx = I * PartsPerRemerge + J;
2435 RemergeParts.emplace_back(Parts[Idx]);
2436 }
2437
2438 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2439 RemergeParts.clear();
2440 }
2441 }
2442
2443 MI.eraseFromParent();
2444 return Legalized;
2445}
2446
2448LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2449 LLT WideTy) {
2450 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2451 unsigned Offset = MI.getOperand(2).getImm();
2452
2453 if (TypeIdx == 0) {
2454 if (SrcTy.isVector() || DstTy.isVector())
2455 return UnableToLegalize;
2456
2457 SrcOp Src(SrcReg);
2458 if (SrcTy.isPointer()) {
2459 // Extracts from pointers can be handled only if they are really just
2460 // simple integers.
2461 const DataLayout &DL = MIRBuilder.getDataLayout();
2462 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2463 return UnableToLegalize;
2464
2465 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2466 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2467 SrcTy = SrcAsIntTy;
2468 }
2469
2470 if (DstTy.isPointer())
2471 return UnableToLegalize;
2472
2473 if (Offset == 0) {
2474 // Avoid a shift in the degenerate case.
2475 MIRBuilder.buildTrunc(DstReg,
2476 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2477 MI.eraseFromParent();
2478 return Legalized;
2479 }
2480
2481 // Do a shift in the source type.
2482 LLT ShiftTy = SrcTy;
2483 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2484 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2485 ShiftTy = WideTy;
2486 }
2487
2488 auto LShr = MIRBuilder.buildLShr(
2489 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2490 MIRBuilder.buildTrunc(DstReg, LShr);
2491 MI.eraseFromParent();
2492 return Legalized;
2493 }
2494
2495 if (SrcTy.isScalar()) {
2496 Observer.changingInstr(MI);
2497 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2498 Observer.changedInstr(MI);
2499 return Legalized;
2500 }
2501
2502 if (!SrcTy.isVector())
2503 return UnableToLegalize;
2504
2505 if (DstTy != SrcTy.getElementType())
2506 return UnableToLegalize;
2507
2508 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2509 return UnableToLegalize;
2510
2511 Observer.changingInstr(MI);
2512 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2513
2514 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2515 Offset);
2516 widenScalarDst(MI, WideTy.getScalarType(), 0);
2517 Observer.changedInstr(MI);
2518 return Legalized;
2519}
2520
2522LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2523 LLT WideTy) {
2524 if (TypeIdx != 0 || WideTy.isVector())
2525 return UnableToLegalize;
2526 Observer.changingInstr(MI);
2527 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2528 widenScalarDst(MI, WideTy);
2529 Observer.changedInstr(MI);
2530 return Legalized;
2531}
2532
2534LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2535 LLT WideTy) {
2536 unsigned Opcode;
2537 unsigned ExtOpcode;
2538 std::optional<Register> CarryIn;
2539 switch (MI.getOpcode()) {
2540 default:
2541 llvm_unreachable("Unexpected opcode!");
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2545 break;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2549 break;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2553 break;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2557 break;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn = MI.getOperand(4).getReg();
2562 break;
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn = MI.getOperand(4).getReg();
2567 break;
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn = MI.getOperand(4).getReg();
2572 break;
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn = MI.getOperand(4).getReg();
2577 break;
2578 }
2579
2580 if (TypeIdx == 1) {
2581 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2582
2583 Observer.changingInstr(MI);
2584 if (CarryIn)
2585 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2586 widenScalarDst(MI, WideTy, 1);
2587
2588 Observer.changedInstr(MI);
2589 return Legalized;
2590 }
2591
2592 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2593 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2594 // Do the arithmetic in the larger type.
2595 Register NewOp;
2596 if (CarryIn) {
2597 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2598 NewOp = MIRBuilder
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2601 .getReg(0);
2602 } else {
2603 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2604 }
2605 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2606 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2608 // There is no overflow if the ExtOp is the same as NewOp.
2609 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2610 // Now trunc the NewOp to the original result.
2611 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2612 MI.eraseFromParent();
2613 return Legalized;
2614}
2615
2617LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2618 LLT WideTy) {
2619 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2624 // We can convert this to:
2625 // 1. Any extend iN to iM
2626 // 2. SHL by M-N
2627 // 3. [US][ADD|SUB|SHL]SAT
2628 // 4. L/ASHR by M-N
2629 //
2630 // It may be more efficient to lower this to a min and a max operation in
2631 // the higher precision arithmetic if the promoted operation isn't legal,
2632 // but this decision is up to the target's lowering request.
2633 Register DstReg = MI.getOperand(0).getReg();
2634
2635 unsigned NewBits = WideTy.getScalarSizeInBits();
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2637
2638 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2639 // must not left shift the RHS to preserve the shift amount.
2640 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2641 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2642 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2643 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2644 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2645 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2646
2647 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR}, MI.getFlags());
2649
2650 // Use a shift that will preserve the number of sign bits when the trunc is
2651 // folded away.
2652 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2653 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2654
2655 MIRBuilder.buildTrunc(DstReg, Result);
2656 MI.eraseFromParent();
2657 return Legalized;
2658}
2659
2661LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2662 LLT WideTy) {
2663 if (TypeIdx == 1) {
2664 Observer.changingInstr(MI);
2665 widenScalarDst(MI, WideTy, 1);
2666 Observer.changedInstr(MI);
2667 return Legalized;
2668 }
2669
2670 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2671 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2672 LLT SrcTy = MRI.getType(LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2674 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2675
2676 // To determine if the result overflowed in the larger type, we extend the
2677 // input to the larger type, do the multiply (checking if it overflows),
2678 // then also check the high bits of the result to see if overflow happened
2679 // there.
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2682 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2683
2684 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2685 // so we don't need to check the overflow result of larger type Mulo.
2686 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2687
2688 unsigned MulOpc =
2689 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2690
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2695 else
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2697
2698 auto Mul = Mulo->getOperand(0);
2699 MIRBuilder.buildTrunc(Result, Mul);
2700
2701 MachineInstrBuilder ExtResult;
2702 // Overflow occurred if it occurred in the larger type, or if the high part
2703 // of the result does not zero/sign-extend the low part. Check this second
2704 // possibility first.
2705 if (IsSigned) {
2706 // For signed, overflow occurred when the high part does not sign-extend
2707 // the low part.
2708 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2709 } else {
2710 // Unsigned overflow occurred when the high part does not zero-extend the
2711 // low part.
2712 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2713 }
2714
2715 if (WideMulCanOverflow) {
2716 auto Overflow =
2717 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2718 // Finally check if the multiplication in the larger type itself overflowed.
2719 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2720 } else {
2721 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2722 }
2723 MI.eraseFromParent();
2724 return Legalized;
2725}
2726
2729 unsigned Opcode = MI.getOpcode();
2730 switch (Opcode) {
2731 default:
2732 return UnableToLegalize;
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2744 Observer.changingInstr(MI);
2745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2746 widenScalarDst(MI, WideTy, 0);
2747 Observer.changedInstr(MI);
2748 return Legalized;
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2751 Observer.changingInstr(MI);
2752 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2753 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2754 widenScalarDst(MI, WideTy, 0);
2755 Observer.changedInstr(MI);
2756 return Legalized;
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2758 if (TypeIdx == 0) {
2759 Observer.changingInstr(MI);
2760 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2761 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2762 widenScalarDst(MI, WideTy, 0);
2763 Observer.changedInstr(MI);
2764 return Legalized;
2765 }
2766 assert(TypeIdx == 1 &&
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2768 Observer.changingInstr(MI);
2769 widenScalarDst(MI, WideTy, 1);
2770 Observer.changedInstr(MI);
2771 return Legalized;
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2805 if (TypeIdx == 0) {
2806 Observer.changingInstr(MI);
2807 widenScalarDst(MI, WideTy, 0);
2808 Observer.changedInstr(MI);
2809 return Legalized;
2810 }
2811
2812 Register SrcReg = MI.getOperand(1).getReg();
2813
2814 // First extend the input.
2815 unsigned ExtOpc;
2816 switch (Opcode) {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF: // undef bits shifted out below
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2821 break;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2824 break;
2825 default:
2826 ExtOpc = TargetOpcode::G_ZEXT;
2827 }
2828
2829 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2833 // The count is the same in the larger type except if the original
2834 // value was zero. This can be handled by setting the bit just off
2835 // the top of the original type.
2836 auto TopBit = APInt::getOneBitSet(WideTy.getScalarSizeInBits(),
2837 CurTy.getScalarSizeInBits());
2838 MIBSrc = MIRBuilder.buildOr(
2839 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2840 // Now we know the operand is non-zero, use the more relaxed opcode.
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2842 }
2843
2844 unsigned SizeDiff =
2845 WideTy.getScalarSizeInBits() - CurTy.getScalarSizeInBits();
2846
2847 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2848 // An optimization where the result is the CTLZ after the left shift by
2849 // (Difference in widety and current ty), that is,
2850 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2851 // Result = ctlz MIBSrc
2852 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2853 MIRBuilder.buildConstant(WideTy, SizeDiff));
2854 }
2855
2856 // Perform the operation at the larger size.
2857 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2858 // This is already the correct result for CTPOP and CTTZs
2859 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2860 // The correct result is NewOp - (Difference in widety and current ty).
2861 // At this stage SUB is guaranteed to be positive no-wrap,
2862 // that to be used in further KnownBits optimizations for CTLZ.
2863 MIBNewOp = MIRBuilder.buildSub(
2864 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff),
2865 Opcode == TargetOpcode::G_CTLZ
2866 ? std::optional<unsigned>(MachineInstr::NoUWrap)
2867 : std::nullopt);
2868 }
2869
2870 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2871 MI.eraseFromParent();
2872 return Legalized;
2873 }
2874 case TargetOpcode::G_BSWAP: {
2875 Observer.changingInstr(MI);
2876 Register DstReg = MI.getOperand(0).getReg();
2877
2878 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2879 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2880 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2881 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2882
2883 MI.getOperand(0).setReg(DstExt);
2884
2885 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2886
2887 LLT Ty = MRI.getType(DstReg);
2888 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2889 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2890 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2891
2892 MIRBuilder.buildTrunc(DstReg, ShrReg);
2893 Observer.changedInstr(MI);
2894 return Legalized;
2895 }
2896 case TargetOpcode::G_BITREVERSE: {
2897 Observer.changingInstr(MI);
2898
2899 Register DstReg = MI.getOperand(0).getReg();
2900 LLT Ty = MRI.getType(DstReg);
2901 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2902
2903 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2904 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2905 MI.getOperand(0).setReg(DstExt);
2906 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2907
2908 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2909 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2910 MIRBuilder.buildTrunc(DstReg, Shift);
2911 Observer.changedInstr(MI);
2912 return Legalized;
2913 }
2914 case TargetOpcode::G_FREEZE:
2915 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2916 Observer.changingInstr(MI);
2917 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2918 widenScalarDst(MI, WideTy);
2919 Observer.changedInstr(MI);
2920 return Legalized;
2921
2922 case TargetOpcode::G_ABS:
2923 Observer.changingInstr(MI);
2924 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2925 widenScalarDst(MI, WideTy);
2926 Observer.changedInstr(MI);
2927 return Legalized;
2928
2929 case TargetOpcode::G_ADD:
2930 case TargetOpcode::G_AND:
2931 case TargetOpcode::G_MUL:
2932 case TargetOpcode::G_OR:
2933 case TargetOpcode::G_XOR:
2934 case TargetOpcode::G_SUB:
2935 case TargetOpcode::G_SHUFFLE_VECTOR:
2936 // Perform operation at larger width (any extension is fines here, high bits
2937 // don't affect the result) and then truncate the result back to the
2938 // original type.
2939 Observer.changingInstr(MI);
2940 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2941 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2942 widenScalarDst(MI, WideTy);
2943 Observer.changedInstr(MI);
2944 return Legalized;
2945
2946 case TargetOpcode::G_SBFX:
2947 case TargetOpcode::G_UBFX:
2948 Observer.changingInstr(MI);
2949
2950 if (TypeIdx == 0) {
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy);
2953 } else {
2954 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2955 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2956 }
2957
2958 Observer.changedInstr(MI);
2959 return Legalized;
2960
2961 case TargetOpcode::G_SHL:
2962 Observer.changingInstr(MI);
2963
2964 if (TypeIdx == 0) {
2965 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2966 widenScalarDst(MI, WideTy);
2967 } else {
2968 assert(TypeIdx == 1);
2969 // The "number of bits to shift" operand must preserve its value as an
2970 // unsigned integer:
2971 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2972 }
2973
2974 Observer.changedInstr(MI);
2975 return Legalized;
2976
2977 case TargetOpcode::G_ROTR:
2978 case TargetOpcode::G_ROTL:
2979 if (TypeIdx != 1)
2980 return UnableToLegalize;
2981
2982 Observer.changingInstr(MI);
2983 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2984 Observer.changedInstr(MI);
2985 return Legalized;
2986
2987 case TargetOpcode::G_SDIV:
2988 case TargetOpcode::G_SREM:
2989 case TargetOpcode::G_SMIN:
2990 case TargetOpcode::G_SMAX:
2991 case TargetOpcode::G_ABDS:
2992 Observer.changingInstr(MI);
2993 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2994 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2995 widenScalarDst(MI, WideTy);
2996 Observer.changedInstr(MI);
2997 return Legalized;
2998
2999 case TargetOpcode::G_SDIVREM:
3000 Observer.changingInstr(MI);
3001 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3002 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3003 widenScalarDst(MI, WideTy);
3004 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3005 widenScalarDst(MI, WideTy, 1);
3006 Observer.changedInstr(MI);
3007 return Legalized;
3008
3009 case TargetOpcode::G_ASHR:
3010 case TargetOpcode::G_LSHR:
3011 Observer.changingInstr(MI);
3012
3013 if (TypeIdx == 0) {
3014 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3015 : TargetOpcode::G_ZEXT;
3016
3017 widenScalarSrc(MI, WideTy, 1, CvtOp);
3018 widenScalarDst(MI, WideTy);
3019 } else {
3020 assert(TypeIdx == 1);
3021 // The "number of bits to shift" operand must preserve its value as an
3022 // unsigned integer:
3023 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3024 }
3025
3026 Observer.changedInstr(MI);
3027 return Legalized;
3028 case TargetOpcode::G_UDIV:
3029 case TargetOpcode::G_UREM:
3030 case TargetOpcode::G_ABDU:
3031 Observer.changingInstr(MI);
3032 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3033 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3034 widenScalarDst(MI, WideTy);
3035 Observer.changedInstr(MI);
3036 return Legalized;
3037 case TargetOpcode::G_UDIVREM:
3038 Observer.changingInstr(MI);
3039 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3040 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3041 widenScalarDst(MI, WideTy);
3042 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3043 widenScalarDst(MI, WideTy, 1);
3044 Observer.changedInstr(MI);
3045 return Legalized;
3046 case TargetOpcode::G_UMIN:
3047 case TargetOpcode::G_UMAX: {
3048 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3049
3050 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3051 unsigned ExtOpc =
3052 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3053 getApproximateEVTForLLT(WideTy, Ctx))
3054 ? TargetOpcode::G_SEXT
3055 : TargetOpcode::G_ZEXT;
3056
3057 Observer.changingInstr(MI);
3058 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3059 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3060 widenScalarDst(MI, WideTy);
3061 Observer.changedInstr(MI);
3062 return Legalized;
3063 }
3064
3065 case TargetOpcode::G_SELECT:
3066 Observer.changingInstr(MI);
3067 if (TypeIdx == 0) {
3068 // Perform operation at larger width (any extension is fine here, high
3069 // bits don't affect the result) and then truncate the result back to the
3070 // original type.
3071 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3072 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3073 widenScalarDst(MI, WideTy);
3074 } else {
3075 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3076 // Explicit extension is required here since high bits affect the result.
3077 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3078 }
3079 Observer.changedInstr(MI);
3080 return Legalized;
3081
3082 case TargetOpcode::G_FPEXT:
3083 if (TypeIdx != 1)
3084 return UnableToLegalize;
3085
3086 Observer.changingInstr(MI);
3087 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3088 Observer.changedInstr(MI);
3089 return Legalized;
3090 case TargetOpcode::G_FPTOSI:
3091 case TargetOpcode::G_FPTOUI:
3092 case TargetOpcode::G_INTRINSIC_LRINT:
3093 case TargetOpcode::G_INTRINSIC_LLRINT:
3094 case TargetOpcode::G_IS_FPCLASS:
3095 Observer.changingInstr(MI);
3096
3097 if (TypeIdx == 0)
3098 widenScalarDst(MI, WideTy);
3099 else
3100 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3101
3102 Observer.changedInstr(MI);
3103 return Legalized;
3104 case TargetOpcode::G_SITOFP:
3105 Observer.changingInstr(MI);
3106
3107 if (TypeIdx == 0)
3108 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3109 else
3110 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3111
3112 Observer.changedInstr(MI);
3113 return Legalized;
3114 case TargetOpcode::G_UITOFP:
3115 Observer.changingInstr(MI);
3116
3117 if (TypeIdx == 0)
3118 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3119 else
3120 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3121
3122 Observer.changedInstr(MI);
3123 return Legalized;
3124 case TargetOpcode::G_FPTOSI_SAT:
3125 case TargetOpcode::G_FPTOUI_SAT:
3126 Observer.changingInstr(MI);
3127
3128 if (TypeIdx == 0) {
3129 Register OldDst = MI.getOperand(0).getReg();
3130 LLT Ty = MRI.getType(OldDst);
3131 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3132 Register NewDst;
3133 MI.getOperand(0).setReg(ExtReg);
3134 uint64_t ShortBits = Ty.getScalarSizeInBits();
3135 uint64_t WideBits = WideTy.getScalarSizeInBits();
3136 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3137 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3138 // z = i16 fptosi_sat(a)
3139 // ->
3140 // x = i32 fptosi_sat(a)
3141 // y = smin(x, 32767)
3142 // z = smax(y, -32768)
3143 auto MaxVal = MIRBuilder.buildConstant(
3144 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3145 auto MinVal = MIRBuilder.buildConstant(
3146 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3147 Register MidReg =
3148 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3149 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3150 } else {
3151 // z = i16 fptoui_sat(a)
3152 // ->
3153 // x = i32 fptoui_sat(a)
3154 // y = smin(x, 65535)
3155 auto MaxVal = MIRBuilder.buildConstant(
3156 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3157 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3158 }
3159 MIRBuilder.buildTrunc(OldDst, NewDst);
3160 } else
3161 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3162
3163 Observer.changedInstr(MI);
3164 return Legalized;
3165 case TargetOpcode::G_LOAD:
3166 case TargetOpcode::G_SEXTLOAD:
3167 case TargetOpcode::G_ZEXTLOAD:
3168 Observer.changingInstr(MI);
3169 widenScalarDst(MI, WideTy);
3170 Observer.changedInstr(MI);
3171 return Legalized;
3172
3173 case TargetOpcode::G_STORE: {
3174 if (TypeIdx != 0)
3175 return UnableToLegalize;
3176
3177 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3178 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3179 if (!Ty.isScalar()) {
3180 // We need to widen the vector element type.
3181 Observer.changingInstr(MI);
3182 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3183 // We also need to adjust the MMO to turn this into a truncating store.
3184 MachineMemOperand &MMO = **MI.memoperands_begin();
3185 MachineFunction &MF = MIRBuilder.getMF();
3186 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3187 MI.setMemRefs(MF, {NewMMO});
3188 Observer.changedInstr(MI);
3189 return Legalized;
3190 }
3191
3192 Observer.changingInstr(MI);
3193
3194 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3195 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3196 widenScalarSrc(MI, WideTy, 0, ExtType);
3197
3198 Observer.changedInstr(MI);
3199 return Legalized;
3200 }
3201 case TargetOpcode::G_CONSTANT: {
3202 MachineOperand &SrcMO = MI.getOperand(1);
3203 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3204 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3205 MRI.getType(MI.getOperand(0).getReg()));
3206 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3207 ExtOpc == TargetOpcode::G_ANYEXT) &&
3208 "Illegal Extend");
3209 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3210 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3211 ? SrcVal.sext(WideTy.getSizeInBits())
3212 : SrcVal.zext(WideTy.getSizeInBits());
3213 Observer.changingInstr(MI);
3214 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3215
3216 widenScalarDst(MI, WideTy);
3217 Observer.changedInstr(MI);
3218 return Legalized;
3219 }
3220 case TargetOpcode::G_FCONSTANT: {
3221 // To avoid changing the bits of the constant due to extension to a larger
3222 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3223 MachineOperand &SrcMO = MI.getOperand(1);
3224 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3225 MIRBuilder.setInstrAndDebugLoc(MI);
3226 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3227 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3228 MI.eraseFromParent();
3229 return Legalized;
3230 }
3231 case TargetOpcode::G_IMPLICIT_DEF: {
3232 Observer.changingInstr(MI);
3233 widenScalarDst(MI, WideTy);
3234 Observer.changedInstr(MI);
3235 return Legalized;
3236 }
3237 case TargetOpcode::G_BRCOND:
3238 Observer.changingInstr(MI);
3239 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3240 Observer.changedInstr(MI);
3241 return Legalized;
3242
3243 case TargetOpcode::G_FCMP:
3244 Observer.changingInstr(MI);
3245 if (TypeIdx == 0)
3246 widenScalarDst(MI, WideTy);
3247 else {
3248 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3249 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3250 }
3251 Observer.changedInstr(MI);
3252 return Legalized;
3253
3254 case TargetOpcode::G_ICMP:
3255 Observer.changingInstr(MI);
3256 if (TypeIdx == 0)
3257 widenScalarDst(MI, WideTy);
3258 else {
3259 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3260 CmpInst::Predicate Pred =
3261 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3262
3263 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3264 unsigned ExtOpcode =
3265 (CmpInst::isSigned(Pred) ||
3266 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3267 getApproximateEVTForLLT(WideTy, Ctx)))
3268 ? TargetOpcode::G_SEXT
3269 : TargetOpcode::G_ZEXT;
3270 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3271 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3272 }
3273 Observer.changedInstr(MI);
3274 return Legalized;
3275
3276 case TargetOpcode::G_PTR_ADD:
3277 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3278 Observer.changingInstr(MI);
3279 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3280 Observer.changedInstr(MI);
3281 return Legalized;
3282
3283 case TargetOpcode::G_PHI: {
3284 assert(TypeIdx == 0 && "Expecting only Idx 0");
3285
3286 Observer.changingInstr(MI);
3287 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3288 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3289 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3290 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3291 }
3292
3293 MachineBasicBlock &MBB = *MI.getParent();
3294 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3295 widenScalarDst(MI, WideTy);
3296 Observer.changedInstr(MI);
3297 return Legalized;
3298 }
3299 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3300 if (TypeIdx == 0) {
3301 Register VecReg = MI.getOperand(1).getReg();
3302 LLT VecTy = MRI.getType(VecReg);
3303 Observer.changingInstr(MI);
3304
3305 widenScalarSrc(MI, LLT::vector(VecTy.getElementCount(), WideTy), 1,
3306 TargetOpcode::G_ANYEXT);
3307
3308 widenScalarDst(MI, WideTy, 0);
3309 Observer.changedInstr(MI);
3310 return Legalized;
3311 }
3312
3313 if (TypeIdx != 2)
3314 return UnableToLegalize;
3315 Observer.changingInstr(MI);
3316 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3317 Observer.changedInstr(MI);
3318 return Legalized;
3319 }
3320 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3321 if (TypeIdx == 0) {
3322 Observer.changingInstr(MI);
3323 const LLT WideEltTy = WideTy.getElementType();
3324
3325 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3326 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3327 widenScalarDst(MI, WideTy, 0);
3328 Observer.changedInstr(MI);
3329 return Legalized;
3330 }
3331
3332 if (TypeIdx == 1) {
3333 Observer.changingInstr(MI);
3334
3335 Register VecReg = MI.getOperand(1).getReg();
3336 LLT VecTy = MRI.getType(VecReg);
3337 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3338
3339 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3340 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3341 widenScalarDst(MI, WideVecTy, 0);
3342 Observer.changedInstr(MI);
3343 return Legalized;
3344 }
3345
3346 if (TypeIdx == 2) {
3347 Observer.changingInstr(MI);
3348 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3349 Observer.changedInstr(MI);
3350 return Legalized;
3351 }
3352
3353 return UnableToLegalize;
3354 }
3355 case TargetOpcode::G_FADD:
3356 case TargetOpcode::G_FMUL:
3357 case TargetOpcode::G_FSUB:
3358 case TargetOpcode::G_FMA:
3359 case TargetOpcode::G_FMAD:
3360 case TargetOpcode::G_FNEG:
3361 case TargetOpcode::G_FABS:
3362 case TargetOpcode::G_FCANONICALIZE:
3363 case TargetOpcode::G_FMINNUM:
3364 case TargetOpcode::G_FMAXNUM:
3365 case TargetOpcode::G_FMINNUM_IEEE:
3366 case TargetOpcode::G_FMAXNUM_IEEE:
3367 case TargetOpcode::G_FMINIMUM:
3368 case TargetOpcode::G_FMAXIMUM:
3369 case TargetOpcode::G_FMINIMUMNUM:
3370 case TargetOpcode::G_FMAXIMUMNUM:
3371 case TargetOpcode::G_FDIV:
3372 case TargetOpcode::G_FREM:
3373 case TargetOpcode::G_FCEIL:
3374 case TargetOpcode::G_FFLOOR:
3375 case TargetOpcode::G_FCOS:
3376 case TargetOpcode::G_FSIN:
3377 case TargetOpcode::G_FTAN:
3378 case TargetOpcode::G_FACOS:
3379 case TargetOpcode::G_FASIN:
3380 case TargetOpcode::G_FATAN:
3381 case TargetOpcode::G_FATAN2:
3382 case TargetOpcode::G_FCOSH:
3383 case TargetOpcode::G_FSINH:
3384 case TargetOpcode::G_FTANH:
3385 case TargetOpcode::G_FLOG10:
3386 case TargetOpcode::G_FLOG:
3387 case TargetOpcode::G_FLOG2:
3388 case TargetOpcode::G_FRINT:
3389 case TargetOpcode::G_FNEARBYINT:
3390 case TargetOpcode::G_FSQRT:
3391 case TargetOpcode::G_FEXP:
3392 case TargetOpcode::G_FEXP2:
3393 case TargetOpcode::G_FEXP10:
3394 case TargetOpcode::G_FPOW:
3395 case TargetOpcode::G_INTRINSIC_TRUNC:
3396 case TargetOpcode::G_INTRINSIC_ROUND:
3397 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3398 assert(TypeIdx == 0);
3399 Observer.changingInstr(MI);
3400
3401 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3402 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3403
3404 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3405 Observer.changedInstr(MI);
3406 return Legalized;
3407 case TargetOpcode::G_FMODF: {
3408 Observer.changingInstr(MI);
3409 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3410
3411 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3412 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3413 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3414 Observer.changedInstr(MI);
3415 return Legalized;
3416 }
3417 case TargetOpcode::G_FPOWI:
3418 case TargetOpcode::G_FLDEXP:
3419 case TargetOpcode::G_STRICT_FLDEXP: {
3420 if (TypeIdx == 0) {
3421 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3422 return UnableToLegalize;
3423
3424 Observer.changingInstr(MI);
3425 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3426 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3427 Observer.changedInstr(MI);
3428 return Legalized;
3429 }
3430
3431 if (TypeIdx == 1) {
3432 // For some reason SelectionDAG tries to promote to a libcall without
3433 // actually changing the integer type for promotion.
3434 Observer.changingInstr(MI);
3435 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3436 Observer.changedInstr(MI);
3437 return Legalized;
3438 }
3439
3440 return UnableToLegalize;
3441 }
3442 case TargetOpcode::G_FFREXP: {
3443 Observer.changingInstr(MI);
3444
3445 if (TypeIdx == 0) {
3446 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3447 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3448 } else {
3449 widenScalarDst(MI, WideTy, 1);
3450 }
3451
3452 Observer.changedInstr(MI);
3453 return Legalized;
3454 }
3455 case TargetOpcode::G_LROUND:
3456 case TargetOpcode::G_LLROUND:
3457 Observer.changingInstr(MI);
3458
3459 if (TypeIdx == 0)
3460 widenScalarDst(MI, WideTy);
3461 else
3462 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3463
3464 Observer.changedInstr(MI);
3465 return Legalized;
3466
3467 case TargetOpcode::G_INTTOPTR:
3468 if (TypeIdx != 1)
3469 return UnableToLegalize;
3470
3471 Observer.changingInstr(MI);
3472 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3473 Observer.changedInstr(MI);
3474 return Legalized;
3475 case TargetOpcode::G_PTRTOINT:
3476 if (TypeIdx != 0)
3477 return UnableToLegalize;
3478
3479 Observer.changingInstr(MI);
3480 widenScalarDst(MI, WideTy, 0);
3481 Observer.changedInstr(MI);
3482 return Legalized;
3483 case TargetOpcode::G_BUILD_VECTOR: {
3484 Observer.changingInstr(MI);
3485
3486 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3487 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3488 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3489
3490 // Avoid changing the result vector type if the source element type was
3491 // requested.
3492 if (TypeIdx == 1) {
3493 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3494 } else {
3495 widenScalarDst(MI, WideTy, 0);
3496 }
3497
3498 Observer.changedInstr(MI);
3499 return Legalized;
3500 }
3501 case TargetOpcode::G_SEXT_INREG:
3502 if (TypeIdx != 0)
3503 return UnableToLegalize;
3504
3505 Observer.changingInstr(MI);
3506 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3507 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3508 Observer.changedInstr(MI);
3509 return Legalized;
3510 case TargetOpcode::G_PTRMASK: {
3511 if (TypeIdx != 1)
3512 return UnableToLegalize;
3513 Observer.changingInstr(MI);
3514 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3515 Observer.changedInstr(MI);
3516 return Legalized;
3517 }
3518 case TargetOpcode::G_VECREDUCE_ADD: {
3519 if (TypeIdx != 1)
3520 return UnableToLegalize;
3521 Observer.changingInstr(MI);
3522 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3523 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3524 Observer.changedInstr(MI);
3525 return Legalized;
3526 }
3527 case TargetOpcode::G_VECREDUCE_FADD:
3528 case TargetOpcode::G_VECREDUCE_FMUL:
3529 case TargetOpcode::G_VECREDUCE_FMIN:
3530 case TargetOpcode::G_VECREDUCE_FMAX:
3531 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3532 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3533 if (TypeIdx != 0)
3534 return UnableToLegalize;
3535 Observer.changingInstr(MI);
3536 Register VecReg = MI.getOperand(1).getReg();
3537 LLT VecTy = MRI.getType(VecReg);
3538 LLT WideVecTy = VecTy.changeElementType(WideTy);
3539 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3540 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3541 Observer.changedInstr(MI);
3542 return Legalized;
3543 }
3544 case TargetOpcode::G_VSCALE: {
3545 MachineOperand &SrcMO = MI.getOperand(1);
3546 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3547 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3548 // The CImm is always a signed value
3549 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3550 Observer.changingInstr(MI);
3551 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3552 widenScalarDst(MI, WideTy);
3553 Observer.changedInstr(MI);
3554 return Legalized;
3555 }
3556 case TargetOpcode::G_SPLAT_VECTOR: {
3557 if (TypeIdx != 1)
3558 return UnableToLegalize;
3559
3560 Observer.changingInstr(MI);
3561 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3562 Observer.changedInstr(MI);
3563 return Legalized;
3564 }
3565 case TargetOpcode::G_INSERT_SUBVECTOR: {
3566 if (TypeIdx != 0)
3567 return UnableToLegalize;
3568
3570 Register BigVec = IS.getBigVec();
3571 Register SubVec = IS.getSubVec();
3572
3573 LLT SubVecTy = MRI.getType(SubVec);
3574 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3575
3576 // Widen the G_INSERT_SUBVECTOR
3577 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3578 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3579 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3580 IS.getIndexImm());
3581
3582 // Truncate back down
3583 auto SplatZero = MIRBuilder.buildSplatVector(
3584 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3585 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3586 SplatZero);
3587
3588 MI.eraseFromParent();
3589
3590 return Legalized;
3591 }
3592 }
3593}
3594
3596 MachineIRBuilder &B, Register Src, LLT Ty) {
3597 auto Unmerge = B.buildUnmerge(Ty, Src);
3598 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3599 Pieces.push_back(Unmerge.getReg(I));
3600}
3601
3602static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3603 MachineIRBuilder &MIRBuilder) {
3604 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3605 MachineFunction &MF = MIRBuilder.getMF();
3606 const DataLayout &DL = MIRBuilder.getDataLayout();
3607 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3608 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3609 LLT DstLLT = MRI.getType(DstReg);
3610
3611 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3612
3613 auto Addr = MIRBuilder.buildConstantPool(
3614 AddrPtrTy,
3615 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3616
3617 MachineMemOperand *MMO =
3619 MachineMemOperand::MOLoad, DstLLT, Alignment);
3620
3621 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3622}
3623
3626 const MachineOperand &ConstOperand = MI.getOperand(1);
3627 const Constant *ConstantVal = ConstOperand.getCImm();
3628
3629 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3630 MI.eraseFromParent();
3631
3632 return Legalized;
3633}
3634
3637 const MachineOperand &ConstOperand = MI.getOperand(1);
3638 const Constant *ConstantVal = ConstOperand.getFPImm();
3639
3640 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3641 MI.eraseFromParent();
3642
3643 return Legalized;
3644}
3645
3648 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3649 if (SrcTy.isVector()) {
3650 LLT SrcEltTy = SrcTy.getElementType();
3652
3653 if (DstTy.isVector()) {
3654 int NumDstElt = DstTy.getNumElements();
3655 int NumSrcElt = SrcTy.getNumElements();
3656
3657 LLT DstEltTy = DstTy.getElementType();
3658 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3659 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3660
3661 // If there's an element size mismatch, insert intermediate casts to match
3662 // the result element type.
3663 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3664 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3665 //
3666 // =>
3667 //
3668 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3669 // %3:_(<2 x s8>) = G_BITCAST %2
3670 // %4:_(<2 x s8>) = G_BITCAST %3
3671 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3672 DstCastTy = DstTy.changeVectorElementCount(
3673 ElementCount::getFixed(NumDstElt / NumSrcElt));
3674 SrcPartTy = SrcEltTy;
3675 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3676 //
3677 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3678 //
3679 // =>
3680 //
3681 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3682 // %3:_(s16) = G_BITCAST %2
3683 // %4:_(s16) = G_BITCAST %3
3684 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3685 SrcPartTy = SrcTy.changeVectorElementCount(
3686 ElementCount::getFixed(NumSrcElt / NumDstElt));
3687 DstCastTy = DstEltTy;
3688 }
3689
3690 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3691 for (Register &SrcReg : SrcRegs)
3692 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3693 } else
3694 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3695
3696 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3697 MI.eraseFromParent();
3698 return Legalized;
3699 }
3700
3701 if (DstTy.isVector()) {
3703 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3704 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3705 MI.eraseFromParent();
3706 return Legalized;
3707 }
3708
3709 return UnableToLegalize;
3710}
3711
3712/// Figure out the bit offset into a register when coercing a vector index for
3713/// the wide element type. This is only for the case when promoting vector to
3714/// one with larger elements.
3715//
3716///
3717/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3718/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3720 Register Idx,
3721 unsigned NewEltSize,
3722 unsigned OldEltSize) {
3723 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3724 LLT IdxTy = B.getMRI()->getType(Idx);
3725
3726 // Now figure out the amount we need to shift to get the target bits.
3727 auto OffsetMask = B.buildConstant(
3728 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3729 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3730 return B.buildShl(IdxTy, OffsetIdx,
3731 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3732}
3733
3734/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3735/// is casting to a vector with a smaller element size, perform multiple element
3736/// extracts and merge the results. If this is coercing to a vector with larger
3737/// elements, index the bitcasted vector and extract the target element with bit
3738/// operations. This is intended to force the indexing in the native register
3739/// size for architectures that can dynamically index the register file.
3742 LLT CastTy) {
3743 if (TypeIdx != 1)
3744 return UnableToLegalize;
3745
3746 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3747
3748 LLT SrcEltTy = SrcVecTy.getElementType();
3749 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3750 unsigned OldNumElts = SrcVecTy.getNumElements();
3751
3752 LLT NewEltTy = CastTy.getScalarType();
3753 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3754
3755 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3756 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3757 if (NewNumElts > OldNumElts) {
3758 // Decreasing the vector element size
3759 //
3760 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3761 // =>
3762 // v4i32:castx = bitcast x:v2i64
3763 //
3764 // i64 = bitcast
3765 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3766 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3767 //
3768 if (NewNumElts % OldNumElts != 0)
3769 return UnableToLegalize;
3770
3771 // Type of the intermediate result vector.
3772 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3773 LLT MidTy =
3774 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3775
3776 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3777
3778 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3779 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3780
3781 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3782 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3783 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3784 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3785 NewOps[I] = Elt.getReg(0);
3786 }
3787
3788 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3789 MIRBuilder.buildBitcast(Dst, NewVec);
3790 MI.eraseFromParent();
3791 return Legalized;
3792 }
3793
3794 if (NewNumElts < OldNumElts) {
3795 if (NewEltSize % OldEltSize != 0)
3796 return UnableToLegalize;
3797
3798 // This only depends on powers of 2 because we use bit tricks to figure out
3799 // the bit offset we need to shift to get the target element. A general
3800 // expansion could emit division/multiply.
3801 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3802 return UnableToLegalize;
3803
3804 // Increasing the vector element size.
3805 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3806 //
3807 // =>
3808 //
3809 // %cast = G_BITCAST %vec
3810 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3811 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3812 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3813 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3814 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3815 // %elt = G_TRUNC %elt_bits
3816
3817 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3818 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3819
3820 // Divide to get the index in the wider element type.
3821 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3822
3823 Register WideElt = CastVec;
3824 if (CastTy.isVector()) {
3825 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3826 ScaledIdx).getReg(0);
3827 }
3828
3829 // Compute the bit offset into the register of the target element.
3831 MIRBuilder, Idx, NewEltSize, OldEltSize);
3832
3833 // Shift the wide element to get the target element.
3834 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3835 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3836 MI.eraseFromParent();
3837 return Legalized;
3838 }
3839
3840 return UnableToLegalize;
3841}
3842
3843/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3844/// TargetReg, while preserving other bits in \p TargetReg.
3845///
3846/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3848 Register TargetReg, Register InsertReg,
3849 Register OffsetBits) {
3850 LLT TargetTy = B.getMRI()->getType(TargetReg);
3851 LLT InsertTy = B.getMRI()->getType(InsertReg);
3852 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3853 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3854
3855 // Produce a bitmask of the value to insert
3856 auto EltMask = B.buildConstant(
3857 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3858 InsertTy.getSizeInBits()));
3859 // Shift it into position
3860 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3861 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3862
3863 // Clear out the bits in the wide element
3864 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3865
3866 // The value to insert has all zeros already, so stick it into the masked
3867 // wide element.
3868 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3869}
3870
3871/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3872/// is increasing the element size, perform the indexing in the target element
3873/// type, and use bit operations to insert at the element position. This is
3874/// intended for architectures that can dynamically index the register file and
3875/// want to force indexing in the native register size.
3878 LLT CastTy) {
3879 if (TypeIdx != 0)
3880 return UnableToLegalize;
3881
3882 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3883 MI.getFirst4RegLLTs();
3884 LLT VecTy = DstTy;
3885
3886 LLT VecEltTy = VecTy.getElementType();
3887 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3888 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3889 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3890
3891 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3892 unsigned OldNumElts = VecTy.getNumElements();
3893
3894 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3895 if (NewNumElts < OldNumElts) {
3896 if (NewEltSize % OldEltSize != 0)
3897 return UnableToLegalize;
3898
3899 // This only depends on powers of 2 because we use bit tricks to figure out
3900 // the bit offset we need to shift to get the target element. A general
3901 // expansion could emit division/multiply.
3902 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3903 return UnableToLegalize;
3904
3905 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3906 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3907
3908 // Divide to get the index in the wider element type.
3909 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3910
3911 Register ExtractedElt = CastVec;
3912 if (CastTy.isVector()) {
3913 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3914 ScaledIdx).getReg(0);
3915 }
3916
3917 // Compute the bit offset into the register of the target element.
3919 MIRBuilder, Idx, NewEltSize, OldEltSize);
3920
3921 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3922 Val, OffsetBits);
3923 if (CastTy.isVector()) {
3924 InsertedElt = MIRBuilder.buildInsertVectorElement(
3925 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3926 }
3927
3928 MIRBuilder.buildBitcast(Dst, InsertedElt);
3929 MI.eraseFromParent();
3930 return Legalized;
3931 }
3932
3933 return UnableToLegalize;
3934}
3935
3936// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3937// those that have smaller than legal operands.
3938//
3939// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3940//
3941// ===>
3942//
3943// s32 = G_BITCAST <4 x s8>
3944// s32 = G_BITCAST <4 x s8>
3945// s32 = G_BITCAST <4 x s8>
3946// s32 = G_BITCAST <4 x s8>
3947// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3948// <16 x s8> = G_BITCAST <4 x s32>
3951 LLT CastTy) {
3952 // Convert it to CONCAT instruction
3953 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3954 if (!ConcatMI) {
3955 return UnableToLegalize;
3956 }
3957
3958 // Check if bitcast is Legal
3959 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3960 LLT SrcScalTy = CastTy.getScalarType();
3961
3962 // Check if the build vector is Legal
3963 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3964 return UnableToLegalize;
3965 }
3966
3967 // Bitcast the sources
3968 SmallVector<Register> BitcastRegs;
3969 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3970 BitcastRegs.push_back(
3971 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3972 .getReg(0));
3973 }
3974
3975 // Build the scalar values into a vector
3976 Register BuildReg =
3977 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3978 MIRBuilder.buildBitcast(DstReg, BuildReg);
3979
3980 MI.eraseFromParent();
3981 return Legalized;
3982}
3983
3984// This bitcasts a shuffle vector to a different type currently of the same
3985// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3986// will be used instead.
3987//
3988// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3989// ===>
3990// <4 x s64> = G_PTRTOINT <4 x p0>
3991// <4 x s64> = G_PTRTOINT <4 x p0>
3992// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3993// <16 x p0> = G_INTTOPTR <16 x s64>
3996 LLT CastTy) {
3997 auto ShuffleMI = cast<GShuffleVector>(&MI);
3998 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3999 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4000
4001 // We currently only handle vectors of the same size.
4002 if (TypeIdx != 0 ||
4003 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4004 CastTy.getElementCount() != DstTy.getElementCount())
4005 return UnableToLegalize;
4006
4007 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4008
4009 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4010 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4011 auto Shuf =
4012 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4013 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4014
4015 MI.eraseFromParent();
4016 return Legalized;
4017}
4018
4019/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4020///
4021/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4022///
4023/// ===>
4024///
4025/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4026/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4027/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4030 LLT CastTy) {
4031 auto ES = cast<GExtractSubvector>(&MI);
4032
4033 if (!CastTy.isVector())
4034 return UnableToLegalize;
4035
4036 if (TypeIdx != 0)
4037 return UnableToLegalize;
4038
4039 Register Dst = ES->getReg(0);
4040 Register Src = ES->getSrcVec();
4041 uint64_t Idx = ES->getIndexImm();
4042
4043 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4044
4045 LLT DstTy = MRI.getType(Dst);
4046 LLT SrcTy = MRI.getType(Src);
4047 ElementCount DstTyEC = DstTy.getElementCount();
4048 ElementCount SrcTyEC = SrcTy.getElementCount();
4049 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4050 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4051
4052 if (DstTy == CastTy)
4053 return Legalized;
4054
4055 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4056 return UnableToLegalize;
4057
4058 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4059 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4060 if (CastEltSize < DstEltSize)
4061 return UnableToLegalize;
4062
4063 auto AdjustAmt = CastEltSize / DstEltSize;
4064 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4065 SrcTyMinElts % AdjustAmt != 0)
4066 return UnableToLegalize;
4067
4068 Idx /= AdjustAmt;
4069 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4070 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4071 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4072 MIRBuilder.buildBitcast(Dst, PromotedES);
4073
4074 ES->eraseFromParent();
4075 return Legalized;
4076}
4077
4078/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4079///
4080/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4081/// <vscale x 8 x i1>,
4082/// N
4083///
4084/// ===>
4085///
4086/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4087/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4088/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4089/// <vscale x 1 x i8>, N / 8
4090/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4093 LLT CastTy) {
4094 auto ES = cast<GInsertSubvector>(&MI);
4095
4096 if (!CastTy.isVector())
4097 return UnableToLegalize;
4098
4099 if (TypeIdx != 0)
4100 return UnableToLegalize;
4101
4102 Register Dst = ES->getReg(0);
4103 Register BigVec = ES->getBigVec();
4104 Register SubVec = ES->getSubVec();
4105 uint64_t Idx = ES->getIndexImm();
4106
4107 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4108
4109 LLT DstTy = MRI.getType(Dst);
4110 LLT BigVecTy = MRI.getType(BigVec);
4111 LLT SubVecTy = MRI.getType(SubVec);
4112
4113 if (DstTy == CastTy)
4114 return Legalized;
4115
4116 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4117 return UnableToLegalize;
4118
4119 ElementCount DstTyEC = DstTy.getElementCount();
4120 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4121 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4122 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4123 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4124 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4125
4126 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4127 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4128 if (CastEltSize < DstEltSize)
4129 return UnableToLegalize;
4130
4131 auto AdjustAmt = CastEltSize / DstEltSize;
4132 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4133 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4134 return UnableToLegalize;
4135
4136 Idx /= AdjustAmt;
4137 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4138 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4139 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4140 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4141 auto PromotedIS =
4142 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4143 MIRBuilder.buildBitcast(Dst, PromotedIS);
4144
4145 ES->eraseFromParent();
4146 return Legalized;
4147}
4148
4150 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4151 Register DstReg = LoadMI.getDstReg();
4152 Register PtrReg = LoadMI.getPointerReg();
4153 LLT DstTy = MRI.getType(DstReg);
4154 MachineMemOperand &MMO = LoadMI.getMMO();
4155 LLT MemTy = MMO.getMemoryType();
4156 MachineFunction &MF = MIRBuilder.getMF();
4157
4158 LLT EltTy = MemTy.getScalarType();
4159
4160 unsigned MemSizeInBits = MemTy.getSizeInBits();
4161 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4162
4163 if (MemSizeInBits != MemStoreSizeInBits) {
4164 if (MemTy.isVector())
4165 return UnableToLegalize;
4166
4167 // Promote to a byte-sized load if not loading an integral number of
4168 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4169 LLT WideMemTy = EltTy.changeElementSize(MemStoreSizeInBits);
4170 MachineMemOperand *NewMMO =
4171 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4172
4173 Register LoadReg = DstReg;
4174 LLT LoadTy = DstTy;
4175
4176 // If this wasn't already an extending load, we need to widen the result
4177 // register to avoid creating a load with a narrower result than the source.
4178 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4179 LoadTy = WideMemTy;
4180 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4181 }
4182
4183 if (isa<GSExtLoad>(LoadMI)) {
4184 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4185 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4186 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4187 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4188 // The extra bits are guaranteed to be zero, since we stored them that
4189 // way. A zext load from Wide thus automatically gives zext from MemVT.
4190 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4191 } else {
4192 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4193 }
4194
4195 if (DstTy != LoadTy)
4196 MIRBuilder.buildTrunc(DstReg, LoadReg);
4197
4198 LoadMI.eraseFromParent();
4199 return Legalized;
4200 }
4201
4202 // Big endian lowering not implemented.
4203 if (MIRBuilder.getDataLayout().isBigEndian())
4204 return UnableToLegalize;
4205
4206 // This load needs splitting into power of 2 sized loads.
4207 //
4208 // Our strategy here is to generate anyextending loads for the smaller
4209 // types up to next power-2 result type, and then combine the two larger
4210 // result values together, before truncating back down to the non-pow-2
4211 // type.
4212 // E.g. v1 = i24 load =>
4213 // v2 = i32 zextload (2 byte)
4214 // v3 = i32 load (1 byte)
4215 // v4 = i32 shl v3, 16
4216 // v5 = i32 or v4, v2
4217 // v1 = i24 trunc v5
4218 // By doing this we generate the correct truncate which should get
4219 // combined away as an artifact with a matching extend.
4220
4221 uint64_t LargeSplitSize, SmallSplitSize;
4222
4223 if (!isPowerOf2_32(MemSizeInBits)) {
4224 // This load needs splitting into power of 2 sized loads.
4225 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4226 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4227 } else {
4228 // This is already a power of 2, but we still need to split this in half.
4229 //
4230 // Assume we're being asked to decompose an unaligned load.
4231 // TODO: If this requires multiple splits, handle them all at once.
4232 auto &Ctx = MF.getFunction().getContext();
4233 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4234 return UnableToLegalize;
4235
4236 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4237 }
4238
4239 if (MemTy.isVector()) {
4240 // TODO: Handle vector extloads
4241 if (MemTy != DstTy)
4242 return UnableToLegalize;
4243
4244 Align Alignment = LoadMI.getAlign();
4245 // Given an alignment larger than the size of the memory, we can increase
4246 // the size of the load without needing to scalarize it.
4247 if (Alignment.value() * 8 > MemSizeInBits &&
4249 LLT MoreTy = DstTy.changeVectorElementCount(
4251 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4252 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4253 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4254 NewLoad.getReg(0));
4255 LoadMI.eraseFromParent();
4256 return Legalized;
4257 }
4258
4259 // TODO: We can do better than scalarizing the vector and at least split it
4260 // in half.
4261 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4262 }
4263
4264 MachineMemOperand *LargeMMO =
4265 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4266 MachineMemOperand *SmallMMO =
4267 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4268
4269 LLT PtrTy = MRI.getType(PtrReg);
4270 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4271
4272 LLT AnyExtTy;
4273 LLT OffsetCstRes;
4274 if (EltTy.isPointer()) {
4275 AnyExtTy = LLT::scalar(AnyExtSize);
4276 OffsetCstRes = LLT::scalar(PtrTy.getSizeInBits());
4277 } else {
4278 AnyExtTy = EltTy.changeElementSize(AnyExtSize);
4279 OffsetCstRes = EltTy.changeElementSize(PtrTy.getSizeInBits());
4280 }
4281
4282 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4283 PtrReg, *LargeMMO);
4284
4285 auto OffsetCst = MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4286 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4287 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4288 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4289 SmallPtr, *SmallMMO);
4290
4291 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4292 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4293
4294 if (AnyExtTy == DstTy)
4295 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4296 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4297 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4298 MIRBuilder.buildTrunc(DstReg, {Or});
4299 } else {
4300 assert(DstTy.isPointer() && "expected pointer");
4301 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4302
4303 // FIXME: We currently consider this to be illegal for non-integral address
4304 // spaces, but we need still need a way to reinterpret the bits.
4305 MIRBuilder.buildIntToPtr(DstReg, Or);
4306 }
4307
4308 LoadMI.eraseFromParent();
4309 return Legalized;
4310}
4311
4313 // Lower a non-power of 2 store into multiple pow-2 stores.
4314 // E.g. split an i24 store into an i16 store + i8 store.
4315 // We do this by first extending the stored value to the next largest power
4316 // of 2 type, and then using truncating stores to store the components.
4317 // By doing this, likewise with G_LOAD, generate an extend that can be
4318 // artifact-combined away instead of leaving behind extracts.
4319 Register SrcReg = StoreMI.getValueReg();
4320 Register PtrReg = StoreMI.getPointerReg();
4321 LLT SrcTy = MRI.getType(SrcReg);
4322 MachineFunction &MF = MIRBuilder.getMF();
4323 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4324 LLT MemTy = MMO.getMemoryType();
4325
4326 unsigned StoreWidth = MemTy.getSizeInBits();
4327 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4328
4329 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4330 // Promote to a byte-sized store with upper bits zero if not
4331 // storing an integral number of bytes. For example, promote
4332 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4333 LLT WideTy = LLT::scalar(StoreSizeInBits);
4334
4335 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4336 // Avoid creating a store with a narrower source than result.
4337 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4338 SrcTy = WideTy;
4339 }
4340
4341 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4342
4343 MachineMemOperand *NewMMO =
4344 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4345 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4346 StoreMI.eraseFromParent();
4347 return Legalized;
4348 }
4349
4350 if (MemTy.isVector()) {
4351 if (MemTy != SrcTy)
4352 return scalarizeVectorBooleanStore(StoreMI);
4353
4354 // TODO: We can do better than scalarizing the vector and at least split it
4355 // in half.
4356 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4357 }
4358
4359 unsigned MemSizeInBits = MemTy.getSizeInBits();
4360 uint64_t LargeSplitSize, SmallSplitSize;
4361
4362 if (!isPowerOf2_32(MemSizeInBits)) {
4363 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4364 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4365 } else {
4366 auto &Ctx = MF.getFunction().getContext();
4367 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4368 return UnableToLegalize; // Don't know what we're being asked to do.
4369
4370 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4371 }
4372
4373 // Extend to the next pow-2. If this store was itself the result of lowering,
4374 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4375 // that's wider than the stored size.
4376 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4377 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4378
4379 if (SrcTy.isPointer()) {
4380 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4381 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4382 }
4383
4384 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4385
4386 // Obtain the smaller value by shifting away the larger value.
4387 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4388 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4389
4390 // Generate the PtrAdd and truncating stores.
4391 LLT PtrTy = MRI.getType(PtrReg);
4392 auto OffsetCst = MIRBuilder.buildConstant(
4393 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4394 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4395
4396 MachineMemOperand *LargeMMO =
4397 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4398 MachineMemOperand *SmallMMO =
4399 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4400 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4401 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4402 StoreMI.eraseFromParent();
4403 return Legalized;
4404}
4405
4408 Register SrcReg = StoreMI.getValueReg();
4409 Register PtrReg = StoreMI.getPointerReg();
4410 LLT SrcTy = MRI.getType(SrcReg);
4411 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4412 LLT MemTy = MMO.getMemoryType();
4413 LLT MemScalarTy = MemTy.getElementType();
4414 MachineFunction &MF = MIRBuilder.getMF();
4415
4416 assert(SrcTy.isVector() && "Expect a vector store type");
4417
4418 if (!MemScalarTy.isByteSized()) {
4419 // We need to build an integer scalar of the vector bit pattern.
4420 // It's not legal for us to add padding when storing a vector.
4421 unsigned NumBits = MemTy.getSizeInBits();
4422 LLT IntTy = LLT::integer(NumBits);
4423 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4424 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4425
4426 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4427 auto Elt = MIRBuilder.buildExtractVectorElement(
4428 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4429 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4430 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4431 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4432 ? (MemTy.getNumElements() - 1) - I
4433 : I;
4434 auto ShiftAmt = MIRBuilder.buildConstant(
4435 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4436 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4437 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4438 }
4439 auto PtrInfo = MMO.getPointerInfo();
4440 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4441 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4442 StoreMI.eraseFromParent();
4443 return Legalized;
4444 }
4445
4446 // TODO: implement simple scalarization.
4447 return UnableToLegalize;
4448}
4449
4451LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4452 switch (MI.getOpcode()) {
4453 case TargetOpcode::G_LOAD: {
4454 if (TypeIdx != 0)
4455 return UnableToLegalize;
4456 MachineMemOperand &MMO = **MI.memoperands_begin();
4457
4458 // Not sure how to interpret a bitcast of an extending load.
4459 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4460 return UnableToLegalize;
4461
4462 Observer.changingInstr(MI);
4463 bitcastDst(MI, CastTy, 0);
4464 MMO.setType(CastTy);
4465 // The range metadata is no longer valid when reinterpreted as a different
4466 // type.
4467 MMO.clearRanges();
4468 Observer.changedInstr(MI);
4469 return Legalized;
4470 }
4471 case TargetOpcode::G_STORE: {
4472 if (TypeIdx != 0)
4473 return UnableToLegalize;
4474
4475 MachineMemOperand &MMO = **MI.memoperands_begin();
4476
4477 // Not sure how to interpret a bitcast of a truncating store.
4478 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4479 return UnableToLegalize;
4480
4481 Observer.changingInstr(MI);
4482 bitcastSrc(MI, CastTy, 0);
4483 MMO.setType(CastTy);
4484 Observer.changedInstr(MI);
4485 return Legalized;
4486 }
4487 case TargetOpcode::G_SELECT: {
4488 if (TypeIdx != 0)
4489 return UnableToLegalize;
4490
4491 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4492 LLVM_DEBUG(
4493 dbgs() << "bitcast action not implemented for vector select\n");
4494 return UnableToLegalize;
4495 }
4496
4497 Observer.changingInstr(MI);
4498 bitcastSrc(MI, CastTy, 2);
4499 bitcastSrc(MI, CastTy, 3);
4500 bitcastDst(MI, CastTy, 0);
4501 Observer.changedInstr(MI);
4502 return Legalized;
4503 }
4504 case TargetOpcode::G_AND:
4505 case TargetOpcode::G_OR:
4506 case TargetOpcode::G_XOR: {
4507 Observer.changingInstr(MI);
4508 bitcastSrc(MI, CastTy, 1);
4509 bitcastSrc(MI, CastTy, 2);
4510 bitcastDst(MI, CastTy, 0);
4511 Observer.changedInstr(MI);
4512 return Legalized;
4513 }
4514 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4515 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4516 case TargetOpcode::G_INSERT_VECTOR_ELT:
4517 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4518 case TargetOpcode::G_CONCAT_VECTORS:
4519 return bitcastConcatVector(MI, TypeIdx, CastTy);
4520 case TargetOpcode::G_SHUFFLE_VECTOR:
4521 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4522 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4523 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4524 case TargetOpcode::G_INSERT_SUBVECTOR:
4525 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4526 default:
4527 return UnableToLegalize;
4528 }
4529}
4530
4531// Legalize an instruction by changing the opcode in place.
4532void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4534 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4536}
4537
4539LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4540 using namespace TargetOpcode;
4541
4542 switch(MI.getOpcode()) {
4543 default:
4544 return UnableToLegalize;
4545 case TargetOpcode::G_FCONSTANT:
4546 return lowerFConstant(MI);
4547 case TargetOpcode::G_BITCAST:
4548 return lowerBitcast(MI);
4549 case TargetOpcode::G_SREM:
4550 case TargetOpcode::G_UREM: {
4551 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4552 auto Quot =
4553 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4554 {MI.getOperand(1), MI.getOperand(2)});
4555
4556 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4557 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4558 MI.eraseFromParent();
4559 return Legalized;
4560 }
4561 case TargetOpcode::G_SADDO:
4562 case TargetOpcode::G_SSUBO:
4563 return lowerSADDO_SSUBO(MI);
4564 case TargetOpcode::G_SADDE:
4565 return lowerSADDE(MI);
4566 case TargetOpcode::G_SSUBE:
4567 return lowerSSUBE(MI);
4568 case TargetOpcode::G_UMULH:
4569 case TargetOpcode::G_SMULH:
4570 return lowerSMULH_UMULH(MI);
4571 case TargetOpcode::G_SMULO:
4572 case TargetOpcode::G_UMULO: {
4573 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4574 // result.
4575 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4576 LLT Ty = MRI.getType(Res);
4577
4578 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4579 ? TargetOpcode::G_SMULH
4580 : TargetOpcode::G_UMULH;
4581
4582 Observer.changingInstr(MI);
4583 const auto &TII = MIRBuilder.getTII();
4584 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4585 MI.removeOperand(1);
4586 Observer.changedInstr(MI);
4587
4588 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4589 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4590
4591 // Move insert point forward so we can use the Res register if needed.
4592 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4593
4594 // For *signed* multiply, overflow is detected by checking:
4595 // (hi != (lo >> bitwidth-1))
4596 if (Opcode == TargetOpcode::G_SMULH) {
4597 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4598 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4599 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4600 } else {
4601 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4602 }
4603 return Legalized;
4604 }
4605 case TargetOpcode::G_FNEG: {
4606 auto [Res, ResTy, SubByReg, SubByRegTy] = MI.getFirst2RegLLTs();
4607 LLT TyInt =
4608 ResTy.changeElementType(LLT::integer(ResTy.getScalarSizeInBits()));
4609 Register CastedSubByReg = SubByReg;
4610
4611 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4612 !SubByRegTy.getScalarType().isInteger()) {
4613 auto BitcastDst = SubByRegTy.changeElementType(
4614 LLT::integer(SubByRegTy.getScalarSizeInBits()));
4615 CastedSubByReg = MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4616 }
4617
4618 auto SignMask = MIRBuilder.buildConstant(
4619 TyInt, APInt::getSignMask(TyInt.getScalarSizeInBits()));
4620
4621 if (ResTy != TyInt) {
4622 Register NewDst =
4623 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4624 MIRBuilder.buildBitcast(Res, NewDst);
4625 } else
4626 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4627
4628 MI.eraseFromParent();
4629 return Legalized;
4630 }
4631 case TargetOpcode::G_FSUB:
4632 case TargetOpcode::G_STRICT_FSUB: {
4633 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4634 LLT Ty = MRI.getType(Res);
4635
4636 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4637 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4638
4639 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4640 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4641 else
4642 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4643
4644 MI.eraseFromParent();
4645 return Legalized;
4646 }
4647 case TargetOpcode::G_FMAD:
4648 return lowerFMad(MI);
4649 case TargetOpcode::G_FFLOOR:
4650 return lowerFFloor(MI);
4651 case TargetOpcode::G_LROUND:
4652 case TargetOpcode::G_LLROUND: {
4653 Register DstReg = MI.getOperand(0).getReg();
4654 Register SrcReg = MI.getOperand(1).getReg();
4655 LLT SrcTy = MRI.getType(SrcReg);
4656 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4657 {SrcReg});
4658 MIRBuilder.buildFPTOSI(DstReg, Round);
4659 MI.eraseFromParent();
4660 return Legalized;
4661 }
4662 case TargetOpcode::G_INTRINSIC_ROUND:
4663 return lowerIntrinsicRound(MI);
4664 case TargetOpcode::G_FRINT: {
4665 // Since round even is the assumed rounding mode for unconstrained FP
4666 // operations, rint and roundeven are the same operation.
4667 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4668 return Legalized;
4669 }
4670 case TargetOpcode::G_INTRINSIC_LRINT:
4671 case TargetOpcode::G_INTRINSIC_LLRINT: {
4672 Register DstReg = MI.getOperand(0).getReg();
4673 Register SrcReg = MI.getOperand(1).getReg();
4674 LLT SrcTy = MRI.getType(SrcReg);
4675 auto Round =
4676 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4677 MIRBuilder.buildFPTOSI(DstReg, Round);
4678 MI.eraseFromParent();
4679 return Legalized;
4680 }
4681 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4682 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4683 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4684 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4685 **MI.memoperands_begin());
4686 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4687 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4688 MI.eraseFromParent();
4689 return Legalized;
4690 }
4691 case TargetOpcode::G_LOAD:
4692 case TargetOpcode::G_SEXTLOAD:
4693 case TargetOpcode::G_ZEXTLOAD:
4694 return lowerLoad(cast<GAnyLoad>(MI));
4695 case TargetOpcode::G_STORE:
4696 return lowerStore(cast<GStore>(MI));
4697 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4698 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4699 case TargetOpcode::G_CTLZ:
4700 case TargetOpcode::G_CTTZ:
4701 case TargetOpcode::G_CTPOP:
4702 case TargetOpcode::G_CTLS:
4703 return lowerBitCount(MI);
4704 case G_UADDO: {
4705 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4706
4707 Register NewRes = MRI.cloneVirtualRegister(Res);
4708
4709 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4710 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4711
4712 MIRBuilder.buildCopy(Res, NewRes);
4713
4714 MI.eraseFromParent();
4715 return Legalized;
4716 }
4717 case G_UADDE: {
4718 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4719 const LLT CondTy = MRI.getType(CarryOut);
4720 const LLT Ty = MRI.getType(Res);
4721
4722 Register NewRes = MRI.cloneVirtualRegister(Res);
4723
4724 // Initial add of the two operands.
4725 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4726
4727 // Initial check for carry.
4728 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4729
4730 // Add the sum and the carry.
4731 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4732 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4733
4734 // Second check for carry. We can only carry if the initial sum is all 1s
4735 // and the carry is set, resulting in a new sum of 0.
4736 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4737 auto ResEqZero =
4738 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4739 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4740 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4741
4742 MIRBuilder.buildCopy(Res, NewRes);
4743
4744 MI.eraseFromParent();
4745 return Legalized;
4746 }
4747 case G_USUBO: {
4748 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4749
4750 MIRBuilder.buildSub(Res, LHS, RHS);
4751 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4752
4753 MI.eraseFromParent();
4754 return Legalized;
4755 }
4756 case G_USUBE: {
4757 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4758 const LLT CondTy = MRI.getType(BorrowOut);
4759 const LLT Ty = MRI.getType(Res);
4760
4761 // Initial subtract of the two operands.
4762 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4763
4764 // Initial check for borrow.
4765 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4766
4767 // Subtract the borrow from the first subtract.
4768 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4769 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4770
4771 // Second check for borrow. We can only borrow if the initial difference is
4772 // 0 and the borrow is set, resulting in a new difference of all 1s.
4773 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4774 auto TmpResEqZero =
4775 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4776 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4777 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4778
4779 MI.eraseFromParent();
4780 return Legalized;
4781 }
4782 case G_UITOFP:
4783 return lowerUITOFP(MI);
4784 case G_SITOFP:
4785 return lowerSITOFP(MI);
4786 case G_FPTOUI:
4787 return lowerFPTOUI(MI);
4788 case G_FPTOSI:
4789 return lowerFPTOSI(MI);
4790 case G_FPTOUI_SAT:
4791 case G_FPTOSI_SAT:
4792 return lowerFPTOINT_SAT(MI);
4793 case G_FPTRUNC:
4794 return lowerFPTRUNC(MI);
4795 case G_FPOWI:
4796 return lowerFPOWI(MI);
4797 case G_FMODF:
4798 return lowerFMODF(MI);
4799 case G_SMIN:
4800 case G_SMAX:
4801 case G_UMIN:
4802 case G_UMAX:
4803 return lowerMinMax(MI);
4804 case G_SCMP:
4805 case G_UCMP:
4806 return lowerThreewayCompare(MI);
4807 case G_FCOPYSIGN:
4808 return lowerFCopySign(MI);
4809 case G_FMINNUM:
4810 case G_FMAXNUM:
4811 case G_FMINIMUMNUM:
4812 case G_FMAXIMUMNUM:
4813 return lowerFMinNumMaxNum(MI);
4814 case G_FMINIMUM:
4815 case G_FMAXIMUM:
4816 return lowerFMinimumMaximum(MI);
4817 case G_MERGE_VALUES:
4818 return lowerMergeValues(MI);
4819 case G_UNMERGE_VALUES:
4820 return lowerUnmergeValues(MI);
4821 case TargetOpcode::G_SEXT_INREG: {
4822 assert(MI.getOperand(2).isImm() && "Expected immediate");
4823 int64_t SizeInBits = MI.getOperand(2).getImm();
4824
4825 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4826 LLT DstTy = MRI.getType(DstReg);
4827 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4828
4829 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4830 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4831 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4832 MI.eraseFromParent();
4833 return Legalized;
4834 }
4835 case G_EXTRACT_VECTOR_ELT:
4836 case G_INSERT_VECTOR_ELT:
4838 case G_SHUFFLE_VECTOR:
4839 return lowerShuffleVector(MI);
4840 case G_VECTOR_COMPRESS:
4841 return lowerVECTOR_COMPRESS(MI);
4842 case G_DYN_STACKALLOC:
4843 return lowerDynStackAlloc(MI);
4844 case G_STACKSAVE:
4845 return lowerStackSave(MI);
4846 case G_STACKRESTORE:
4847 return lowerStackRestore(MI);
4848 case G_EXTRACT:
4849 return lowerExtract(MI);
4850 case G_INSERT:
4851 return lowerInsert(MI);
4852 case G_BSWAP:
4853 return lowerBswap(MI);
4854 case G_BITREVERSE:
4855 return lowerBitreverse(MI);
4856 case G_READ_REGISTER:
4857 case G_WRITE_REGISTER:
4858 return lowerReadWriteRegister(MI);
4859 case G_UADDSAT:
4860 case G_USUBSAT: {
4861 // Try to make a reasonable guess about which lowering strategy to use. The
4862 // target can override this with custom lowering and calling the
4863 // implementation functions.
4864 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4865 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4866 return lowerAddSubSatToMinMax(MI);
4868 }
4869 case G_SADDSAT:
4870 case G_SSUBSAT: {
4871 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4872
4873 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4874 // since it's a shorter expansion. However, we would need to figure out the
4875 // preferred boolean type for the carry out for the query.
4876 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4877 return lowerAddSubSatToMinMax(MI);
4879 }
4880 case G_SSHLSAT:
4881 case G_USHLSAT:
4882 return lowerShlSat(MI);
4883 case G_ABS:
4884 return lowerAbsToAddXor(MI);
4885 case G_ABDS:
4886 case G_ABDU: {
4887 bool IsSigned = MI.getOpcode() == G_ABDS;
4888 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4889 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4890 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4891 return lowerAbsDiffToMinMax(MI);
4892 }
4893 return lowerAbsDiffToSelect(MI);
4894 }
4895 case G_FABS:
4896 return lowerFAbs(MI);
4897 case G_SELECT:
4898 return lowerSelect(MI);
4899 case G_IS_FPCLASS:
4900 return lowerISFPCLASS(MI);
4901 case G_SDIVREM:
4902 case G_UDIVREM:
4903 return lowerDIVREM(MI);
4904 case G_FSHL:
4905 case G_FSHR:
4906 return lowerFunnelShift(MI);
4907 case G_ROTL:
4908 case G_ROTR:
4909 return lowerRotate(MI);
4910 case G_MEMSET:
4911 case G_MEMCPY:
4912 case G_MEMMOVE:
4913 return lowerMemCpyFamily(MI);
4914 case G_MEMCPY_INLINE:
4915 return lowerMemcpyInline(MI);
4916 case G_ZEXT:
4917 case G_SEXT:
4918 case G_ANYEXT:
4919 return lowerEXT(MI);
4920 case G_TRUNC:
4921 return lowerTRUNC(MI);
4923 return lowerVectorReduction(MI);
4924 case G_VAARG:
4925 return lowerVAArg(MI);
4926 case G_ATOMICRMW_SUB: {
4927 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4928 const LLT ValTy = MRI.getType(Val);
4929 MachineMemOperand *MMO = *MI.memoperands_begin();
4930
4931 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4932 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4933 MI.eraseFromParent();
4934 return Legalized;
4935 }
4936 }
4937}
4938
4940 Align MinAlign) const {
4941 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4942 // datalayout for the preferred alignment. Also there should be a target hook
4943 // for this to allow targets to reduce the alignment and ignore the
4944 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4945 // the type.
4946 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4947}
4948
4951 MachinePointerInfo &PtrInfo) {
4952 MachineFunction &MF = MIRBuilder.getMF();
4953 const DataLayout &DL = MIRBuilder.getDataLayout();
4954 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4955
4956 unsigned AddrSpace = DL.getAllocaAddrSpace();
4957 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4958
4959 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4960 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4961}
4962
4964 const SrcOp &Val) {
4965 LLT SrcTy = Val.getLLTTy(MRI);
4966 Align StackTypeAlign =
4967 std::max(getStackTemporaryAlignment(SrcTy),
4969 MachinePointerInfo PtrInfo;
4970 auto StackTemp =
4971 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4972
4973 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4974 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4975}
4976
4978 LLT VecTy) {
4979 LLT IdxTy = B.getMRI()->getType(IdxReg);
4980 unsigned NElts = VecTy.getNumElements();
4981
4982 int64_t IdxVal;
4983 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4984 if (IdxVal < VecTy.getNumElements())
4985 return IdxReg;
4986 // If a constant index would be out of bounds, clamp it as well.
4987 }
4988
4989 if (isPowerOf2_32(NElts)) {
4990 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4991 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4992 }
4993
4994 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4995 .getReg(0);
4996}
4997
4999 Register Index) {
5000 LLT EltTy = VecTy.getElementType();
5001
5002 // Calculate the element offset and add it to the pointer.
5003 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
5004 assert(EltSize * 8 == EltTy.getSizeInBits() &&
5005 "Converting bits to bytes lost precision");
5006
5007 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
5008
5009 // Convert index to the correct size for the address space.
5010 const DataLayout &DL = MIRBuilder.getDataLayout();
5011 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5012 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
5013 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
5014 if (IdxTy != MRI.getType(Index))
5015 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5016
5017 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
5018 MIRBuilder.buildConstant(IdxTy, EltSize));
5019
5020 LLT PtrTy = MRI.getType(VecPtr);
5021 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
5022}
5023
5024#ifndef NDEBUG
5025/// Check that all vector operands have same number of elements. Other operands
5026/// should be listed in NonVecOp.
5029 std::initializer_list<unsigned> NonVecOpIndices) {
5030 if (MI.getNumMemOperands() != 0)
5031 return false;
5032
5033 LLT VecTy = MRI.getType(MI.getReg(0));
5034 if (!VecTy.isVector())
5035 return false;
5036 unsigned NumElts = VecTy.getNumElements();
5037
5038 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5039 MachineOperand &Op = MI.getOperand(OpIdx);
5040 if (!Op.isReg()) {
5041 if (!is_contained(NonVecOpIndices, OpIdx))
5042 return false;
5043 continue;
5044 }
5045
5046 LLT Ty = MRI.getType(Op.getReg());
5047 if (!Ty.isVector()) {
5048 if (!is_contained(NonVecOpIndices, OpIdx))
5049 return false;
5050 continue;
5051 }
5052
5053 if (Ty.getNumElements() != NumElts)
5054 return false;
5055 }
5056
5057 return true;
5058}
5059#endif
5060
5061/// Fill \p DstOps with DstOps that have same number of elements combined as
5062/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5063/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5064/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5065static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5066 unsigned NumElts) {
5067 LLT LeftoverTy;
5068 assert(Ty.isVector() && "Expected vector type");
5069 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5070 int NumParts, NumLeftover;
5071 std::tie(NumParts, NumLeftover) =
5072 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5073
5074 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5075 for (int i = 0; i < NumParts; ++i) {
5076 DstOps.push_back(NarrowTy);
5077 }
5078
5079 if (LeftoverTy.isValid()) {
5080 assert(NumLeftover == 1 && "expected exactly one leftover");
5081 DstOps.push_back(LeftoverTy);
5082 }
5083}
5084
5085/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5086/// made from \p Op depending on operand type.
5088 MachineOperand &Op) {
5089 for (unsigned i = 0; i < N; ++i) {
5090 if (Op.isReg())
5091 Ops.push_back(Op.getReg());
5092 else if (Op.isImm())
5093 Ops.push_back(Op.getImm());
5094 else if (Op.isPredicate())
5095 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5096 else
5097 llvm_unreachable("Unsupported type");
5098 }
5099}
5100
5101// Handle splitting vector operations which need to have the same number of
5102// elements in each type index, but each type index may have a different element
5103// type.
5104//
5105// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5106// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5107// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5108//
5109// Also handles some irregular breakdown cases, e.g.
5110// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5111// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5112// s64 = G_SHL s64, s32
5115 GenericMachineInstr &MI, unsigned NumElts,
5116 std::initializer_list<unsigned> NonVecOpIndices) {
5117 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5118 "Non-compatible opcode or not specified non-vector operands");
5119 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5120
5121 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5122 unsigned NumDefs = MI.getNumDefs();
5123
5124 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5125 // Build instructions with DstOps to use instruction found by CSE directly.
5126 // CSE copies found instruction into given vreg when building with vreg dest.
5127 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5128 // Output registers will be taken from created instructions.
5129 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5130 for (unsigned i = 0; i < NumDefs; ++i) {
5131 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5132 }
5133
5134 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5135 // Operands listed in NonVecOpIndices will be used as is without splitting;
5136 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5137 // scalar condition (op 1), immediate in sext_inreg (op 2).
5138 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5139 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5140 ++UseIdx, ++UseNo) {
5141 if (is_contained(NonVecOpIndices, UseIdx)) {
5142 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5143 MI.getOperand(UseIdx));
5144 } else {
5145 SmallVector<Register, 8> SplitPieces;
5146 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5147 MRI);
5148 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5149 }
5150 }
5151
5152 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5153
5154 // Take i-th piece of each input operand split and build sub-vector/scalar
5155 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5156 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5158 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5159 Defs.push_back(OutputOpsPieces[DstNo][i]);
5160
5162 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5163 Uses.push_back(InputOpsPieces[InputNo][i]);
5164
5165 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5166 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5167 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5168 }
5169
5170 // Merge small outputs into MI's output for each def operand.
5171 if (NumLeftovers) {
5172 for (unsigned i = 0; i < NumDefs; ++i)
5173 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5174 } else {
5175 for (unsigned i = 0; i < NumDefs; ++i)
5176 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5177 }
5178
5179 MI.eraseFromParent();
5180 return Legalized;
5181}
5182
5185 unsigned NumElts) {
5186 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5187
5188 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5189 unsigned NumDefs = MI.getNumDefs();
5190
5191 SmallVector<DstOp, 8> OutputOpsPieces;
5192 SmallVector<Register, 8> OutputRegs;
5193 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5194
5195 // Instructions that perform register split will be inserted in basic block
5196 // where register is defined (basic block is in the next operand).
5197 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5198 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5199 UseIdx += 2, ++UseNo) {
5200 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5201 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5202 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5203 MIRBuilder, MRI);
5204 }
5205
5206 // Build PHIs with fewer elements.
5207 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5208 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5209 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5210 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5211 Phi.addDef(
5212 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5213 OutputRegs.push_back(Phi.getReg(0));
5214
5215 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5216 Phi.addUse(InputOpsPieces[j][i]);
5217 Phi.add(MI.getOperand(1 + j * 2 + 1));
5218 }
5219 }
5220
5221 // Set the insert point after the existing PHIs
5222 MachineBasicBlock &MBB = *MI.getParent();
5223 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5224
5225 // Merge small outputs into MI's def.
5226 if (NumLeftovers) {
5227 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5228 } else {
5229 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5230 }
5231
5232 MI.eraseFromParent();
5233 return Legalized;
5234}
5235
5238 unsigned TypeIdx,
5239 LLT NarrowTy) {
5240 const int NumDst = MI.getNumOperands() - 1;
5241 const Register SrcReg = MI.getOperand(NumDst).getReg();
5242 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5243 LLT SrcTy = MRI.getType(SrcReg);
5244
5245 if (TypeIdx != 1 || NarrowTy == DstTy)
5246 return UnableToLegalize;
5247
5248 // Requires compatible types. Otherwise SrcReg should have been defined by
5249 // merge-like instruction that would get artifact combined. Most likely
5250 // instruction that defines SrcReg has to perform more/fewer elements
5251 // legalization compatible with NarrowTy.
5252 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5253 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5254
5255 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5256 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5257 return UnableToLegalize;
5258
5259 // This is most likely DstTy (smaller then register size) packed in SrcTy
5260 // (larger then register size) and since unmerge was not combined it will be
5261 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5262 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5263
5264 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5265 //
5266 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5267 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5268 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5269 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5270 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5271 const int PartsPerUnmerge = NumDst / NumUnmerge;
5272
5273 for (int I = 0; I != NumUnmerge; ++I) {
5274 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5275
5276 for (int J = 0; J != PartsPerUnmerge; ++J)
5277 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5278 MIB.addUse(Unmerge.getReg(I));
5279 }
5280
5281 MI.eraseFromParent();
5282 return Legalized;
5283}
5284
5287 LLT NarrowTy) {
5288 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5289 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5290 // that should have been artifact combined. Most likely instruction that uses
5291 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5292 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5293 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5294 if (NarrowTy == SrcTy)
5295 return UnableToLegalize;
5296
5297 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5298 // is for old mir tests. Since the changes to more/fewer elements it should no
5299 // longer be possible to generate MIR like this when starting from llvm-ir
5300 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5301 if (TypeIdx == 1) {
5302 assert(SrcTy.isVector() && "Expected vector types");
5303 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5304 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5305 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5306 return UnableToLegalize;
5307 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5308 //
5309 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5310 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5311 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5312 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5313 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5314 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5315
5317 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5318 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5319 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5320 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5321 Elts.push_back(Unmerge.getReg(j));
5322 }
5323
5324 SmallVector<Register, 8> NarrowTyElts;
5325 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5326 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5327 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5328 ++i, Offset += NumNarrowTyElts) {
5329 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5330 NarrowTyElts.push_back(
5331 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5332 }
5333
5334 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5335 MI.eraseFromParent();
5336 return Legalized;
5337 }
5338
5339 assert(TypeIdx == 0 && "Bad type index");
5340 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5341 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5342 return UnableToLegalize;
5343
5344 // This is most likely SrcTy (smaller then register size) packed in DstTy
5345 // (larger then register size) and since merge was not combined it will be
5346 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5347 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5348
5349 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5350 //
5351 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5352 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5353 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5354 SmallVector<Register, 8> NarrowTyElts;
5355 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5356 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5357 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5358 for (unsigned i = 0; i < NumParts; ++i) {
5360 for (unsigned j = 0; j < NumElts; ++j)
5361 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5362 NarrowTyElts.push_back(
5363 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5364 }
5365
5366 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5367 MI.eraseFromParent();
5368 return Legalized;
5369}
5370
5373 unsigned TypeIdx,
5374 LLT NarrowVecTy) {
5375 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5376 Register InsertVal;
5377 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5378
5379 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5380 if (IsInsert)
5381 InsertVal = MI.getOperand(2).getReg();
5382
5383 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5384 LLT VecTy = MRI.getType(SrcVec);
5385
5386 // If the index is a constant, we can really break this down as you would
5387 // expect, and index into the target size pieces.
5388 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5389 if (MaybeCst) {
5390 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5391 // Avoid out of bounds indexing the pieces.
5392 if (IdxVal >= VecTy.getNumElements()) {
5393 MIRBuilder.buildUndef(DstReg);
5394 MI.eraseFromParent();
5395 return Legalized;
5396 }
5397
5398 if (!NarrowVecTy.isVector()) {
5399 SmallVector<Register, 8> SplitPieces;
5400 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5401 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5402 if (IsInsert) {
5403 SplitPieces[IdxVal] = InsertVal;
5404 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5405 } else {
5406 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5407 }
5408 } else {
5409 SmallVector<Register, 8> VecParts;
5410 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5411
5412 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5413 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5414 TargetOpcode::G_ANYEXT);
5415
5416 unsigned NewNumElts = NarrowVecTy.getNumElements();
5417
5418 LLT IdxTy = MRI.getType(Idx);
5419 int64_t PartIdx = IdxVal / NewNumElts;
5420 auto NewIdx =
5421 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5422
5423 if (IsInsert) {
5424 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5425
5426 // Use the adjusted index to insert into one of the subvectors.
5427 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5428 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5429 VecParts[PartIdx] = InsertPart.getReg(0);
5430
5431 // Recombine the inserted subvector with the others to reform the result
5432 // vector.
5433 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5434 } else {
5435 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5436 }
5437 }
5438
5439 MI.eraseFromParent();
5440 return Legalized;
5441 }
5442
5443 // With a variable index, we can't perform the operation in a smaller type, so
5444 // we're forced to expand this.
5445 //
5446 // TODO: We could emit a chain of compare/select to figure out which piece to
5447 // index.
5449}
5450
5453 LLT NarrowTy) {
5454 // FIXME: Don't know how to handle secondary types yet.
5455 if (TypeIdx != 0)
5456 return UnableToLegalize;
5457
5458 if (!NarrowTy.isByteSized()) {
5459 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5460 return UnableToLegalize;
5461 }
5462
5463 // This implementation doesn't work for atomics. Give up instead of doing
5464 // something invalid.
5465 if (LdStMI.isAtomic())
5466 return UnableToLegalize;
5467
5468 bool IsLoad = isa<GLoad>(LdStMI);
5469 Register ValReg = LdStMI.getReg(0);
5470 Register AddrReg = LdStMI.getPointerReg();
5471 LLT ValTy = MRI.getType(ValReg);
5472
5473 // FIXME: Do we need a distinct NarrowMemory legalize action?
5474 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5475 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5476 return UnableToLegalize;
5477 }
5478
5479 int NumParts = -1;
5480 int NumLeftover = -1;
5481 LLT LeftoverTy;
5482 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5483 if (IsLoad) {
5484 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5485 } else {
5486 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5487 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5488 NumParts = NarrowRegs.size();
5489 NumLeftover = NarrowLeftoverRegs.size();
5490 }
5491 }
5492
5493 if (NumParts == -1)
5494 return UnableToLegalize;
5495
5496 LLT PtrTy = MRI.getType(AddrReg);
5497 const LLT OffsetTy = LLT::integer(PtrTy.getSizeInBits());
5498
5499 unsigned TotalSize = ValTy.getSizeInBits();
5500
5501 // Split the load/store into PartTy sized pieces starting at Offset. If this
5502 // is a load, return the new registers in ValRegs. For a store, each elements
5503 // of ValRegs should be PartTy. Returns the next offset that needs to be
5504 // handled.
5505 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5506 auto MMO = LdStMI.getMMO();
5507 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5508 unsigned NumParts, unsigned Offset) -> unsigned {
5509 MachineFunction &MF = MIRBuilder.getMF();
5510 unsigned PartSize = PartTy.getSizeInBits();
5511 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5512 ++Idx) {
5513 unsigned ByteOffset = Offset / 8;
5514 Register NewAddrReg;
5515
5516 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5517 ByteOffset);
5518
5519 MachineMemOperand *NewMMO =
5520 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5521
5522 if (IsLoad) {
5523 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5524 ValRegs.push_back(Dst);
5525 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5526 } else {
5527 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5528 }
5529 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5530 }
5531
5532 return Offset;
5533 };
5534
5535 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5536 unsigned HandledOffset =
5537 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5538
5539 // Handle the rest of the register if this isn't an even type breakdown.
5540 if (LeftoverTy.isValid())
5541 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5542
5543 if (IsLoad) {
5544 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5545 LeftoverTy, NarrowLeftoverRegs);
5546 }
5547
5548 LdStMI.eraseFromParent();
5549 return Legalized;
5550}
5551
5554 LLT NarrowTy) {
5555 using namespace TargetOpcode;
5557 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5558
5559 switch (MI.getOpcode()) {
5560 case G_IMPLICIT_DEF:
5561 case G_TRUNC:
5562 case G_AND:
5563 case G_OR:
5564 case G_XOR:
5565 case G_ADD:
5566 case G_SUB:
5567 case G_MUL:
5568 case G_PTR_ADD:
5569 case G_SMULH:
5570 case G_UMULH:
5571 case G_FADD:
5572 case G_FMUL:
5573 case G_FSUB:
5574 case G_FNEG:
5575 case G_FABS:
5576 case G_FCANONICALIZE:
5577 case G_FDIV:
5578 case G_FREM:
5579 case G_FMA:
5580 case G_FMAD:
5581 case G_FPOW:
5582 case G_FEXP:
5583 case G_FEXP2:
5584 case G_FEXP10:
5585 case G_FLOG:
5586 case G_FLOG2:
5587 case G_FLOG10:
5588 case G_FLDEXP:
5589 case G_FNEARBYINT:
5590 case G_FCEIL:
5591 case G_FFLOOR:
5592 case G_FRINT:
5593 case G_INTRINSIC_LRINT:
5594 case G_INTRINSIC_LLRINT:
5595 case G_INTRINSIC_ROUND:
5596 case G_INTRINSIC_ROUNDEVEN:
5597 case G_LROUND:
5598 case G_LLROUND:
5599 case G_INTRINSIC_TRUNC:
5600 case G_FMODF:
5601 case G_FCOS:
5602 case G_FSIN:
5603 case G_FTAN:
5604 case G_FACOS:
5605 case G_FASIN:
5606 case G_FATAN:
5607 case G_FATAN2:
5608 case G_FCOSH:
5609 case G_FSINH:
5610 case G_FTANH:
5611 case G_FSQRT:
5612 case G_BSWAP:
5613 case G_BITREVERSE:
5614 case G_SDIV:
5615 case G_UDIV:
5616 case G_SREM:
5617 case G_UREM:
5618 case G_SDIVREM:
5619 case G_UDIVREM:
5620 case G_SMIN:
5621 case G_SMAX:
5622 case G_UMIN:
5623 case G_UMAX:
5624 case G_ABS:
5625 case G_FMINNUM:
5626 case G_FMAXNUM:
5627 case G_FMINNUM_IEEE:
5628 case G_FMAXNUM_IEEE:
5629 case G_FMINIMUM:
5630 case G_FMAXIMUM:
5631 case G_FMINIMUMNUM:
5632 case G_FMAXIMUMNUM:
5633 case G_FSHL:
5634 case G_FSHR:
5635 case G_ROTL:
5636 case G_ROTR:
5637 case G_FREEZE:
5638 case G_SADDSAT:
5639 case G_SSUBSAT:
5640 case G_UADDSAT:
5641 case G_USUBSAT:
5642 case G_UMULO:
5643 case G_SMULO:
5644 case G_SHL:
5645 case G_LSHR:
5646 case G_ASHR:
5647 case G_SSHLSAT:
5648 case G_USHLSAT:
5649 case G_CTLZ:
5650 case G_CTLZ_ZERO_UNDEF:
5651 case G_CTTZ:
5652 case G_CTTZ_ZERO_UNDEF:
5653 case G_CTPOP:
5654 case G_CTLS:
5655 case G_FCOPYSIGN:
5656 case G_ZEXT:
5657 case G_SEXT:
5658 case G_ANYEXT:
5659 case G_FPEXT:
5660 case G_FPTRUNC:
5661 case G_SITOFP:
5662 case G_UITOFP:
5663 case G_FPTOSI:
5664 case G_FPTOUI:
5665 case G_FPTOSI_SAT:
5666 case G_FPTOUI_SAT:
5667 case G_INTTOPTR:
5668 case G_PTRTOINT:
5669 case G_ADDRSPACE_CAST:
5670 case G_UADDO:
5671 case G_USUBO:
5672 case G_UADDE:
5673 case G_USUBE:
5674 case G_SADDO:
5675 case G_SSUBO:
5676 case G_SADDE:
5677 case G_SSUBE:
5678 case G_STRICT_FADD:
5679 case G_STRICT_FSUB:
5680 case G_STRICT_FMUL:
5681 case G_STRICT_FMA:
5682 case G_STRICT_FLDEXP:
5683 case G_FFREXP:
5684 case G_TRUNC_SSAT_S:
5685 case G_TRUNC_SSAT_U:
5686 case G_TRUNC_USAT_U:
5687 return fewerElementsVectorMultiEltType(GMI, NumElts);
5688 case G_ICMP:
5689 case G_FCMP:
5690 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5691 case G_IS_FPCLASS:
5692 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5693 case G_SELECT:
5694 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5695 return fewerElementsVectorMultiEltType(GMI, NumElts);
5696 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5697 case G_PHI:
5698 return fewerElementsVectorPhi(GMI, NumElts);
5699 case G_UNMERGE_VALUES:
5700 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5701 case G_BUILD_VECTOR:
5702 assert(TypeIdx == 0 && "not a vector type index");
5703 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5704 case G_CONCAT_VECTORS:
5705 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5706 return UnableToLegalize;
5707 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5708 case G_EXTRACT_VECTOR_ELT:
5709 case G_INSERT_VECTOR_ELT:
5710 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5711 case G_LOAD:
5712 case G_STORE:
5713 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5714 case G_SEXT_INREG:
5715 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5717 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5718 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5719 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5720 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5721 case G_SHUFFLE_VECTOR:
5722 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5723 case G_FPOWI:
5724 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5725 case G_BITCAST:
5726 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5727 case G_INTRINSIC_FPTRUNC_ROUND:
5728 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5729 default:
5730 return UnableToLegalize;
5731 }
5732}
5733
5736 LLT NarrowTy) {
5737 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5738 "Not a bitcast operation");
5739
5740 if (TypeIdx != 0)
5741 return UnableToLegalize;
5742
5743 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5744
5745 unsigned NewElemCount =
5746 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5747 SmallVector<Register> SrcVRegs, BitcastVRegs;
5748 if (NewElemCount == 1) {
5749 LLT SrcNarrowTy = SrcTy.getElementType();
5750
5751 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5752 getUnmergeResults(SrcVRegs, *Unmerge);
5753 } else {
5754 LLT SrcNarrowTy =
5756
5757 // Split the Src and Dst Reg into smaller registers
5758 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5759 return UnableToLegalize;
5760 }
5761
5762 // Build new smaller bitcast instructions
5763 // Not supporting Leftover types for now but will have to
5764 for (Register Reg : SrcVRegs)
5765 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5766
5767 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5768 MI.eraseFromParent();
5769 return Legalized;
5770}
5771
5773 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5774 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5775 if (TypeIdx != 0)
5776 return UnableToLegalize;
5777
5778 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5779 MI.getFirst3RegLLTs();
5780 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5781 // The shuffle should be canonicalized by now.
5782 if (DstTy != Src1Ty)
5783 return UnableToLegalize;
5784 if (DstTy != Src2Ty)
5785 return UnableToLegalize;
5786
5787 if (!isPowerOf2_32(DstTy.getNumElements()))
5788 return UnableToLegalize;
5789
5790 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5791 // Further legalization attempts will be needed to do split further.
5792 NarrowTy =
5793 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5794 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5795
5796 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5797 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5798 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5799 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5800 SplitSrc2Regs[1]};
5801
5802 Register Hi, Lo;
5803
5804 // If Lo or Hi uses elements from at most two of the four input vectors, then
5805 // express it as a vector shuffle of those two inputs. Otherwise extract the
5806 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5808 for (unsigned High = 0; High < 2; ++High) {
5809 Register &Output = High ? Hi : Lo;
5810
5811 // Build a shuffle mask for the output, discovering on the fly which
5812 // input vectors to use as shuffle operands (recorded in InputUsed).
5813 // If building a suitable shuffle vector proves too hard, then bail
5814 // out with useBuildVector set.
5815 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5816 unsigned FirstMaskIdx = High * NewElts;
5817 bool UseBuildVector = false;
5818 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5819 // The mask element. This indexes into the input.
5820 int Idx = Mask[FirstMaskIdx + MaskOffset];
5821
5822 // The input vector this mask element indexes into.
5823 unsigned Input = (unsigned)Idx / NewElts;
5824
5825 if (Input >= std::size(Inputs)) {
5826 // The mask element does not index into any input vector.
5827 Ops.push_back(-1);
5828 continue;
5829 }
5830
5831 // Turn the index into an offset from the start of the input vector.
5832 Idx -= Input * NewElts;
5833
5834 // Find or create a shuffle vector operand to hold this input.
5835 unsigned OpNo;
5836 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5837 if (InputUsed[OpNo] == Input) {
5838 // This input vector is already an operand.
5839 break;
5840 } else if (InputUsed[OpNo] == -1U) {
5841 // Create a new operand for this input vector.
5842 InputUsed[OpNo] = Input;
5843 break;
5844 }
5845 }
5846
5847 if (OpNo >= std::size(InputUsed)) {
5848 // More than two input vectors used! Give up on trying to create a
5849 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5850 UseBuildVector = true;
5851 break;
5852 }
5853
5854 // Add the mask index for the new shuffle vector.
5855 Ops.push_back(Idx + OpNo * NewElts);
5856 }
5857
5858 if (UseBuildVector) {
5859 LLT EltTy = NarrowTy.getElementType();
5861
5862 // Extract the input elements by hand.
5863 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5864 // The mask element. This indexes into the input.
5865 int Idx = Mask[FirstMaskIdx + MaskOffset];
5866
5867 // The input vector this mask element indexes into.
5868 unsigned Input = (unsigned)Idx / NewElts;
5869
5870 if (Input >= std::size(Inputs)) {
5871 // The mask element is "undef" or indexes off the end of the input.
5872 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5873 continue;
5874 }
5875
5876 // Turn the index into an offset from the start of the input vector.
5877 Idx -= Input * NewElts;
5878
5879 // Extract the vector element by hand.
5880 SVOps.push_back(MIRBuilder
5881 .buildExtractVectorElement(
5882 EltTy, Inputs[Input],
5883 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5884 .getReg(0));
5885 }
5886
5887 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5888 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5889 } else if (InputUsed[0] == -1U) {
5890 // No input vectors were used! The result is undefined.
5891 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5892 } else if (NewElts == 1) {
5893 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5894 } else {
5895 Register Op0 = Inputs[InputUsed[0]];
5896 // If only one input was used, use an undefined vector for the other.
5897 Register Op1 = InputUsed[1] == -1U
5898 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5899 : Inputs[InputUsed[1]];
5900 // At least one input vector was used. Create a new shuffle vector.
5901 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5902 }
5903
5904 Ops.clear();
5905 }
5906
5907 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5908 MI.eraseFromParent();
5909 return Legalized;
5910}
5911
5913 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5914 auto &RdxMI = cast<GVecReduce>(MI);
5915
5916 if (TypeIdx != 1)
5917 return UnableToLegalize;
5918
5919 // The semantics of the normal non-sequential reductions allow us to freely
5920 // re-associate the operation.
5921 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5922
5923 if (NarrowTy.isVector() &&
5924 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5925 return UnableToLegalize;
5926
5927 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5928 SmallVector<Register> SplitSrcs;
5929 // If NarrowTy is a scalar then we're being asked to scalarize.
5930 const unsigned NumParts =
5931 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5932 : SrcTy.getNumElements();
5933
5934 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5935 if (NarrowTy.isScalar()) {
5936 if (DstTy != NarrowTy)
5937 return UnableToLegalize; // FIXME: handle implicit extensions.
5938
5939 if (isPowerOf2_32(NumParts)) {
5940 // Generate a tree of scalar operations to reduce the critical path.
5941 SmallVector<Register> PartialResults;
5942 unsigned NumPartsLeft = NumParts;
5943 while (NumPartsLeft > 1) {
5944 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5945 PartialResults.emplace_back(
5947 .buildInstr(ScalarOpc, {NarrowTy},
5948 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5949 .getReg(0));
5950 }
5951 SplitSrcs = PartialResults;
5952 PartialResults.clear();
5953 NumPartsLeft = SplitSrcs.size();
5954 }
5955 assert(SplitSrcs.size() == 1);
5956 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5957 MI.eraseFromParent();
5958 return Legalized;
5959 }
5960 // If we can't generate a tree, then just do sequential operations.
5961 Register Acc = SplitSrcs[0];
5962 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5963 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5964 .getReg(0);
5965 MIRBuilder.buildCopy(DstReg, Acc);
5966 MI.eraseFromParent();
5967 return Legalized;
5968 }
5969 SmallVector<Register> PartialReductions;
5970 for (unsigned Part = 0; Part < NumParts; ++Part) {
5971 PartialReductions.push_back(
5972 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5973 .getReg(0));
5974 }
5975
5976 // If the types involved are powers of 2, we can generate intermediate vector
5977 // ops, before generating a final reduction operation.
5978 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5979 isPowerOf2_32(NarrowTy.getNumElements())) {
5980 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5981 }
5982
5983 Register Acc = PartialReductions[0];
5984 for (unsigned Part = 1; Part < NumParts; ++Part) {
5985 if (Part == NumParts - 1) {
5986 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5987 {Acc, PartialReductions[Part]});
5988 } else {
5989 Acc = MIRBuilder
5990 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5991 .getReg(0);
5992 }
5993 }
5994 MI.eraseFromParent();
5995 return Legalized;
5996}
5997
6000 unsigned int TypeIdx,
6001 LLT NarrowTy) {
6002 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6003 MI.getFirst3RegLLTs();
6004 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6005 DstTy != NarrowTy)
6006 return UnableToLegalize;
6007
6008 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6009 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6010 "Unexpected vecreduce opcode");
6011 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6012 ? TargetOpcode::G_FADD
6013 : TargetOpcode::G_FMUL;
6014
6015 SmallVector<Register> SplitSrcs;
6016 unsigned NumParts = SrcTy.getNumElements();
6017 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
6018 Register Acc = ScalarReg;
6019 for (unsigned i = 0; i < NumParts; i++)
6020 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6021 .getReg(0);
6022
6023 MIRBuilder.buildCopy(DstReg, Acc);
6024 MI.eraseFromParent();
6025 return Legalized;
6026}
6027
6029LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
6030 LLT SrcTy, LLT NarrowTy,
6031 unsigned ScalarOpc) {
6032 SmallVector<Register> SplitSrcs;
6033 // Split the sources into NarrowTy size pieces.
6034 extractParts(SrcReg, NarrowTy,
6035 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
6036 MIRBuilder, MRI);
6037 // We're going to do a tree reduction using vector operations until we have
6038 // one NarrowTy size value left.
6039 while (SplitSrcs.size() > 1) {
6040 SmallVector<Register> PartialRdxs;
6041 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6042 Register LHS = SplitSrcs[Idx];
6043 Register RHS = SplitSrcs[Idx + 1];
6044 // Create the intermediate vector op.
6045 Register Res =
6046 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6047 PartialRdxs.push_back(Res);
6048 }
6049 SplitSrcs = std::move(PartialRdxs);
6050 }
6051 // Finally generate the requested NarrowTy based reduction.
6052 Observer.changingInstr(MI);
6053 MI.getOperand(1).setReg(SplitSrcs[0]);
6054 Observer.changedInstr(MI);
6055 return Legalized;
6056}
6057
6060 const LLT HalfTy, const LLT AmtTy) {
6061
6062 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6063 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6064 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6065
6066 if (Amt.isZero()) {
6067 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6068 MI.eraseFromParent();
6069 return Legalized;
6070 }
6071
6072 LLT NVT = HalfTy;
6073 unsigned NVTBits = HalfTy.getSizeInBits();
6074 unsigned VTBits = 2 * NVTBits;
6075
6076 SrcOp Lo(Register(0)), Hi(Register(0));
6077 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6078 if (Amt.ugt(VTBits)) {
6079 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6080 } else if (Amt.ugt(NVTBits)) {
6081 Lo = MIRBuilder.buildConstant(NVT, 0);
6082 Hi = MIRBuilder.buildShl(NVT, InL,
6083 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6084 } else if (Amt == NVTBits) {
6085 Lo = MIRBuilder.buildConstant(NVT, 0);
6086 Hi = InL;
6087 } else {
6088 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6089 auto OrLHS =
6090 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6091 auto OrRHS = MIRBuilder.buildLShr(
6092 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6093 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6094 }
6095 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6096 if (Amt.ugt(VTBits)) {
6097 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6098 } else if (Amt.ugt(NVTBits)) {
6099 Lo = MIRBuilder.buildLShr(NVT, InH,
6100 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6101 Hi = MIRBuilder.buildConstant(NVT, 0);
6102 } else if (Amt == NVTBits) {
6103 Lo = InH;
6104 Hi = MIRBuilder.buildConstant(NVT, 0);
6105 } else {
6106 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6107
6108 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6109 auto OrRHS = MIRBuilder.buildShl(
6110 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6111
6112 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6113 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6114 }
6115 } else {
6116 if (Amt.ugt(VTBits)) {
6117 Hi = Lo = MIRBuilder.buildAShr(
6118 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6119 } else if (Amt.ugt(NVTBits)) {
6120 Lo = MIRBuilder.buildAShr(NVT, InH,
6121 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6122 Hi = MIRBuilder.buildAShr(NVT, InH,
6123 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6124 } else if (Amt == NVTBits) {
6125 Lo = InH;
6126 Hi = MIRBuilder.buildAShr(NVT, InH,
6127 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6128 } else {
6129 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6130
6131 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6132 auto OrRHS = MIRBuilder.buildShl(
6133 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6134
6135 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6136 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6137 }
6138 }
6139
6140 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6141 MI.eraseFromParent();
6142
6143 return Legalized;
6144}
6145
6148 LLT RequestedTy) {
6149 if (TypeIdx == 1) {
6150 Observer.changingInstr(MI);
6151 narrowScalarSrc(MI, RequestedTy, 2);
6152 Observer.changedInstr(MI);
6153 return Legalized;
6154 }
6155
6156 Register DstReg = MI.getOperand(0).getReg();
6157 LLT DstTy = MRI.getType(DstReg);
6158 if (DstTy.isVector())
6159 return UnableToLegalize;
6160
6161 Register Amt = MI.getOperand(2).getReg();
6162 LLT ShiftAmtTy = MRI.getType(Amt);
6163 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6164 if (DstEltSize % 2 != 0)
6165 return UnableToLegalize;
6166
6167 // Check if we should use multi-way splitting instead of recursive binary
6168 // splitting.
6169 //
6170 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6171 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6172 // and dependency chains created by usual binary splitting approach
6173 // (128->64->32).
6174 //
6175 // The >= 8 parts threshold ensures we only use this optimization when binary
6176 // splitting would require multiple recursive passes, avoiding overhead for
6177 // simple 2-way splits where binary approach is sufficient.
6178 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6179 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6180 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6181 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6182 // steps).
6183 if (NumParts >= 8)
6184 return narrowScalarShiftMultiway(MI, RequestedTy);
6185 }
6186
6187 // Fall back to binary splitting:
6188 // Ignore the input type. We can only go to exactly half the size of the
6189 // input. If that isn't small enough, the resulting pieces will be further
6190 // legalized.
6191 const unsigned NewBitSize = DstEltSize / 2;
6192 const LLT HalfTy = DstTy.getScalarType().changeElementSize(NewBitSize);
6193 const LLT CondTy = LLT::scalar(1);
6194
6195 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6196 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6197 ShiftAmtTy);
6198 }
6199
6200 // TODO: Expand with known bits.
6201
6202 // Handle the fully general expansion by an unknown amount.
6203 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6204
6205 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6206 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6207 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6208
6209 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6210 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6211
6212 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6213 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6214 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6215
6216 Register ResultRegs[2];
6217 switch (MI.getOpcode()) {
6218 case TargetOpcode::G_SHL: {
6219 // Short: ShAmt < NewBitSize
6220 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6221
6222 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6223 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6224 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6225
6226 // Long: ShAmt >= NewBitSize
6227 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6228 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6229
6230 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6231 auto Hi = MIRBuilder.buildSelect(
6232 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6233
6234 ResultRegs[0] = Lo.getReg(0);
6235 ResultRegs[1] = Hi.getReg(0);
6236 break;
6237 }
6238 case TargetOpcode::G_LSHR:
6239 case TargetOpcode::G_ASHR: {
6240 // Short: ShAmt < NewBitSize
6241 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6242
6243 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6244 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6245 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6246
6247 // Long: ShAmt >= NewBitSize
6249 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6250 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6251 } else {
6252 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6253 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6254 }
6255 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6256 {InH, AmtExcess}); // Lo from Hi part.
6257
6258 auto Lo = MIRBuilder.buildSelect(
6259 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6260
6261 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6262
6263 ResultRegs[0] = Lo.getReg(0);
6264 ResultRegs[1] = Hi.getReg(0);
6265 break;
6266 }
6267 default:
6268 llvm_unreachable("not a shift");
6269 }
6270
6271 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6272 MI.eraseFromParent();
6273 return Legalized;
6274}
6275
6277 unsigned PartIdx,
6278 unsigned NumParts,
6279 ArrayRef<Register> SrcParts,
6280 const ShiftParams &Params,
6281 LLT TargetTy, LLT ShiftAmtTy) {
6282 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6283 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6284 assert(WordShiftConst && BitShiftConst && "Expected constants");
6285
6286 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6287 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6288 const bool NeedsInterWordShift = ShiftBits != 0;
6289
6290 switch (Opcode) {
6291 case TargetOpcode::G_SHL: {
6292 // Data moves from lower indices to higher indices
6293 // If this part would come from a source beyond our range, it's zero
6294 if (PartIdx < ShiftWords)
6295 return Params.Zero;
6296
6297 unsigned SrcIdx = PartIdx - ShiftWords;
6298 if (!NeedsInterWordShift)
6299 return SrcParts[SrcIdx];
6300
6301 // Combine shifted main part with carry from previous part
6302 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6303 if (SrcIdx > 0) {
6304 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6305 Params.InvBitShift);
6306 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6307 }
6308 return Hi.getReg(0);
6309 }
6310
6311 case TargetOpcode::G_LSHR: {
6312 unsigned SrcIdx = PartIdx + ShiftWords;
6313 if (SrcIdx >= NumParts)
6314 return Params.Zero;
6315 if (!NeedsInterWordShift)
6316 return SrcParts[SrcIdx];
6317
6318 // Combine shifted main part with carry from next part
6319 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6320 if (SrcIdx + 1 < NumParts) {
6321 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6322 Params.InvBitShift);
6323 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6324 }
6325 return Lo.getReg(0);
6326 }
6327
6328 case TargetOpcode::G_ASHR: {
6329 // Like LSHR but preserves sign bit
6330 unsigned SrcIdx = PartIdx + ShiftWords;
6331 if (SrcIdx >= NumParts)
6332 return Params.SignBit;
6333 if (!NeedsInterWordShift)
6334 return SrcParts[SrcIdx];
6335
6336 // Only the original MSB part uses arithmetic shift to preserve sign. All
6337 // other parts use logical shift since they're just moving data bits.
6338 auto Lo =
6339 (SrcIdx == NumParts - 1)
6340 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6341 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6342 Register HiSrc =
6343 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6344 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6345 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6346 }
6347
6348 default:
6349 llvm_unreachable("not a shift");
6350 }
6351}
6352
6354 Register MainOperand,
6355 Register ShiftAmt,
6356 LLT TargetTy,
6357 Register CarryOperand) {
6358 // This helper generates a single output part for variable shifts by combining
6359 // the main operand (shifted by BitShift) with carry bits from an adjacent
6360 // part.
6361
6362 // For G_ASHR, individual parts don't have their own sign bit, only the
6363 // complete value does. So we use LSHR for the main operand shift in ASHR
6364 // context.
6365 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6366 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6367 : Opcode;
6368
6369 // Perform the primary shift on the main operand
6370 Register MainShifted =
6371 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6372 .getReg(0);
6373
6374 // No carry operand available
6375 if (!CarryOperand.isValid())
6376 return MainShifted;
6377
6378 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6379 // so carry bits aren't needed.
6380 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6381 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6382 LLT BoolTy = LLT::scalar(1);
6383 auto IsZeroBitShift =
6384 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6385
6386 // Extract bits from the adjacent part that will "carry over" into this part.
6387 // The carry direction is opposite to the main shift direction, so we can
6388 // align the two shifted values before combining them with OR.
6389
6390 // Determine the carry shift opcode (opposite direction)
6391 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6392 : TargetOpcode::G_SHL;
6393
6394 // Calculate inverse shift amount: BitWidth - ShiftAmt
6395 auto TargetBitsConst =
6396 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6397 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6398
6399 // Shift the carry operand
6400 Register CarryBits =
6402 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6403 .getReg(0);
6404
6405 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6406 // TargetBits which would be poison for the individual carry shift operation).
6407 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6408 Register SafeCarryBits =
6409 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6410 .getReg(0);
6411
6412 // Combine the main shifted part with the carry bits
6413 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6414}
6415
6418 const APInt &Amt,
6419 LLT TargetTy,
6420 LLT ShiftAmtTy) {
6421 // Any wide shift can be decomposed into WordShift + BitShift components.
6422 // When shift amount is known constant, directly compute the decomposition
6423 // values and generate constant registers.
6424 Register DstReg = MI.getOperand(0).getReg();
6425 Register SrcReg = MI.getOperand(1).getReg();
6426 LLT DstTy = MRI.getType(DstReg);
6427
6428 const unsigned DstBits = DstTy.getScalarSizeInBits();
6429 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6430 const unsigned NumParts = DstBits / TargetBits;
6431
6432 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6433
6434 // When the shift amount is known at compile time, we just calculate which
6435 // source parts contribute to each output part.
6436
6437 SmallVector<Register, 8> SrcParts;
6438 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6439
6440 if (Amt.isZero()) {
6441 // No shift needed, just copy
6442 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6443 MI.eraseFromParent();
6444 return Legalized;
6445 }
6446
6447 ShiftParams Params;
6448 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6449 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6450
6451 // Generate constants and values needed by all shift types
6452 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6453 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6454 Params.InvBitShift =
6455 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6456 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6457
6458 // For ASHR, we need the sign-extended value to fill shifted-out positions
6459 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6460 Params.SignBit =
6462 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6463 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6464 .getReg(0);
6465
6466 SmallVector<Register, 8> DstParts(NumParts);
6467 for (unsigned I = 0; I < NumParts; ++I)
6468 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6469 Params, TargetTy, ShiftAmtTy);
6470
6471 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6472 MI.eraseFromParent();
6473 return Legalized;
6474}
6475
6478 Register DstReg = MI.getOperand(0).getReg();
6479 Register SrcReg = MI.getOperand(1).getReg();
6480 Register AmtReg = MI.getOperand(2).getReg();
6481 LLT DstTy = MRI.getType(DstReg);
6482 LLT ShiftAmtTy = MRI.getType(AmtReg);
6483
6484 const unsigned DstBits = DstTy.getScalarSizeInBits();
6485 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6486 const unsigned NumParts = DstBits / TargetBits;
6487
6488 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6489 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6490
6491 // If the shift amount is known at compile time, we can use direct indexing
6492 // instead of generating select chains in the general case.
6493 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6494 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6495 ShiftAmtTy);
6496
6497 // For runtime-variable shift amounts, we must generate a more complex
6498 // sequence that handles all possible shift values using select chains.
6499
6500 // Split the input into target-sized pieces
6501 SmallVector<Register, 8> SrcParts;
6502 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6503
6504 // Shifting by zero should be a no-op.
6505 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6506 LLT BoolTy = LLT::scalar(1);
6507 auto IsZeroShift =
6508 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6509
6510 // Any wide shift can be decomposed into two components:
6511 // 1. WordShift: number of complete target-sized words to shift
6512 // 2. BitShift: number of bits to shift within each word
6513 //
6514 // Example: 128-bit >> 50 with 32-bit target:
6515 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6516 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6517 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6518 auto TargetBitsLog2Const =
6519 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6520 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6521
6522 Register WordShift =
6523 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6524 Register BitShift =
6525 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6526
6527 // Fill values:
6528 // - SHL/LSHR: fill with zeros
6529 // - ASHR: fill with sign-extended MSB
6530 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6531
6532 Register FillValue;
6533 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6534 auto TargetBitsMinusOneConst =
6535 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6536 FillValue = MIRBuilder
6537 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6538 TargetBitsMinusOneConst)
6539 .getReg(0);
6540 } else {
6541 FillValue = ZeroReg;
6542 }
6543
6544 SmallVector<Register, 8> DstParts(NumParts);
6545
6546 // For each output part, generate a select chain that chooses the correct
6547 // result based on the runtime WordShift value. This handles all possible
6548 // word shift amounts by pre-calculating what each would produce.
6549 for (unsigned I = 0; I < NumParts; ++I) {
6550 // Initialize with appropriate default value for this shift type
6551 Register InBoundsResult = FillValue;
6552
6553 // clang-format off
6554 // Build a branchless select chain by pre-computing results for all possible
6555 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6556 //
6557 // K=0: select(WordShift==0, result0, FillValue)
6558 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6559 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6560 // clang-format on
6561 for (unsigned K = 0; K < NumParts; ++K) {
6562 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6563 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6564 WordShift, WordShiftKConst);
6565
6566 // Calculate source indices for this word shift
6567 //
6568 // For 4-part 128-bit value with K=1 word shift:
6569 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6570 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6571 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6572 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6573 int MainSrcIdx;
6574 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6575
6576 switch (MI.getOpcode()) {
6577 case TargetOpcode::G_SHL:
6578 MainSrcIdx = (int)I - (int)K;
6579 CarrySrcIdx = MainSrcIdx - 1;
6580 break;
6581 case TargetOpcode::G_LSHR:
6582 case TargetOpcode::G_ASHR:
6583 MainSrcIdx = (int)I + (int)K;
6584 CarrySrcIdx = MainSrcIdx + 1;
6585 break;
6586 default:
6587 llvm_unreachable("Not a shift");
6588 }
6589
6590 // Check bounds and build the result for this word shift
6591 Register ResultForK;
6592 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6593 Register MainOp = SrcParts[MainSrcIdx];
6594 Register CarryOp;
6595
6596 // Determine carry operand with bounds checking
6597 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6598 CarryOp = SrcParts[CarrySrcIdx];
6599 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6600 CarrySrcIdx >= (int)NumParts)
6601 CarryOp = FillValue; // Use sign extension
6602
6603 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6604 TargetTy, CarryOp);
6605 } else {
6606 // Out of bounds - use fill value for this k
6607 ResultForK = FillValue;
6608 }
6609
6610 // Select this result if WordShift equals k
6611 InBoundsResult =
6613 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6614 .getReg(0);
6615 }
6616
6617 // Handle zero-shift special case: if shift is 0, use original input
6618 DstParts[I] =
6620 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6621 .getReg(0);
6622 }
6623
6624 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6625 MI.eraseFromParent();
6626 return Legalized;
6627}
6628
6631 LLT MoreTy) {
6632 assert(TypeIdx == 0 && "Expecting only Idx 0");
6633
6634 Observer.changingInstr(MI);
6635 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6636 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6637 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6638 moreElementsVectorSrc(MI, MoreTy, I);
6639 }
6640
6641 MachineBasicBlock &MBB = *MI.getParent();
6642 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6643 moreElementsVectorDst(MI, MoreTy, 0);
6644 Observer.changedInstr(MI);
6645 return Legalized;
6646}
6647
6648MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6649 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6650 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6651
6652 switch (Opcode) {
6653 default:
6655 "getNeutralElementForVecReduce called with invalid opcode!");
6656 case TargetOpcode::G_VECREDUCE_ADD:
6657 case TargetOpcode::G_VECREDUCE_OR:
6658 case TargetOpcode::G_VECREDUCE_XOR:
6659 case TargetOpcode::G_VECREDUCE_UMAX:
6660 return MIRBuilder.buildConstant(Ty, 0);
6661 case TargetOpcode::G_VECREDUCE_MUL:
6662 return MIRBuilder.buildConstant(Ty, 1);
6663 case TargetOpcode::G_VECREDUCE_AND:
6664 case TargetOpcode::G_VECREDUCE_UMIN:
6666 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6667 case TargetOpcode::G_VECREDUCE_SMAX:
6669 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6670 case TargetOpcode::G_VECREDUCE_SMIN:
6672 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6673 case TargetOpcode::G_VECREDUCE_FADD:
6674 return MIRBuilder.buildFConstant(Ty, -0.0);
6675 case TargetOpcode::G_VECREDUCE_FMUL:
6676 return MIRBuilder.buildFConstant(Ty, 1.0);
6677 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6678 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6679 assert(false && "getNeutralElementForVecReduce unimplemented for "
6680 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6681 }
6682 llvm_unreachable("switch expected to return!");
6683}
6684
6687 LLT MoreTy) {
6688 unsigned Opc = MI.getOpcode();
6689 switch (Opc) {
6690 case TargetOpcode::G_IMPLICIT_DEF:
6691 case TargetOpcode::G_LOAD: {
6692 if (TypeIdx != 0)
6693 return UnableToLegalize;
6694 Observer.changingInstr(MI);
6695 moreElementsVectorDst(MI, MoreTy, 0);
6696 Observer.changedInstr(MI);
6697 return Legalized;
6698 }
6699 case TargetOpcode::G_STORE:
6700 if (TypeIdx != 0)
6701 return UnableToLegalize;
6702 Observer.changingInstr(MI);
6703 moreElementsVectorSrc(MI, MoreTy, 0);
6704 Observer.changedInstr(MI);
6705 return Legalized;
6706 case TargetOpcode::G_AND:
6707 case TargetOpcode::G_OR:
6708 case TargetOpcode::G_XOR:
6709 case TargetOpcode::G_ADD:
6710 case TargetOpcode::G_SUB:
6711 case TargetOpcode::G_MUL:
6712 case TargetOpcode::G_FADD:
6713 case TargetOpcode::G_FSUB:
6714 case TargetOpcode::G_FMUL:
6715 case TargetOpcode::G_FDIV:
6716 case TargetOpcode::G_FCOPYSIGN:
6717 case TargetOpcode::G_UADDSAT:
6718 case TargetOpcode::G_USUBSAT:
6719 case TargetOpcode::G_SADDSAT:
6720 case TargetOpcode::G_SSUBSAT:
6721 case TargetOpcode::G_SMIN:
6722 case TargetOpcode::G_SMAX:
6723 case TargetOpcode::G_UMIN:
6724 case TargetOpcode::G_UMAX:
6725 case TargetOpcode::G_FMINNUM:
6726 case TargetOpcode::G_FMAXNUM:
6727 case TargetOpcode::G_FMINNUM_IEEE:
6728 case TargetOpcode::G_FMAXNUM_IEEE:
6729 case TargetOpcode::G_FMINIMUM:
6730 case TargetOpcode::G_FMAXIMUM:
6731 case TargetOpcode::G_FMINIMUMNUM:
6732 case TargetOpcode::G_FMAXIMUMNUM:
6733 case TargetOpcode::G_STRICT_FADD:
6734 case TargetOpcode::G_STRICT_FSUB:
6735 case TargetOpcode::G_STRICT_FMUL: {
6736 Observer.changingInstr(MI);
6737 moreElementsVectorSrc(MI, MoreTy, 1);
6738 moreElementsVectorSrc(MI, MoreTy, 2);
6739 moreElementsVectorDst(MI, MoreTy, 0);
6740 Observer.changedInstr(MI);
6741 return Legalized;
6742 }
6743 case TargetOpcode::G_SHL:
6744 case TargetOpcode::G_ASHR:
6745 case TargetOpcode::G_LSHR: {
6746 Observer.changingInstr(MI);
6747 moreElementsVectorSrc(MI, MoreTy, 1);
6748 // The shift operand may have a different scalar type from the source and
6749 // destination operands.
6750 LLT ShiftMoreTy = MoreTy.changeElementType(
6751 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6752 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6753 moreElementsVectorDst(MI, MoreTy, 0);
6754 Observer.changedInstr(MI);
6755 return Legalized;
6756 }
6757 case TargetOpcode::G_FMA:
6758 case TargetOpcode::G_STRICT_FMA:
6759 case TargetOpcode::G_FSHR:
6760 case TargetOpcode::G_FSHL: {
6761 Observer.changingInstr(MI);
6762 moreElementsVectorSrc(MI, MoreTy, 1);
6763 moreElementsVectorSrc(MI, MoreTy, 2);
6764 moreElementsVectorSrc(MI, MoreTy, 3);
6765 moreElementsVectorDst(MI, MoreTy, 0);
6766 Observer.changedInstr(MI);
6767 return Legalized;
6768 }
6769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6770 case TargetOpcode::G_EXTRACT:
6771 if (TypeIdx != 1)
6772 return UnableToLegalize;
6773 Observer.changingInstr(MI);
6774 moreElementsVectorSrc(MI, MoreTy, 1);
6775 Observer.changedInstr(MI);
6776 return Legalized;
6777 case TargetOpcode::G_INSERT:
6778 case TargetOpcode::G_INSERT_VECTOR_ELT:
6779 case TargetOpcode::G_FREEZE:
6780 case TargetOpcode::G_FNEG:
6781 case TargetOpcode::G_FABS:
6782 case TargetOpcode::G_FSQRT:
6783 case TargetOpcode::G_FCEIL:
6784 case TargetOpcode::G_FFLOOR:
6785 case TargetOpcode::G_FNEARBYINT:
6786 case TargetOpcode::G_FRINT:
6787 case TargetOpcode::G_INTRINSIC_ROUND:
6788 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6789 case TargetOpcode::G_INTRINSIC_TRUNC:
6790 case TargetOpcode::G_BITREVERSE:
6791 case TargetOpcode::G_BSWAP:
6792 case TargetOpcode::G_FCANONICALIZE:
6793 case TargetOpcode::G_SEXT_INREG:
6794 case TargetOpcode::G_ABS:
6795 case TargetOpcode::G_CTLZ:
6796 case TargetOpcode::G_CTPOP:
6797 if (TypeIdx != 0)
6798 return UnableToLegalize;
6799 Observer.changingInstr(MI);
6800 moreElementsVectorSrc(MI, MoreTy, 1);
6801 moreElementsVectorDst(MI, MoreTy, 0);
6802 Observer.changedInstr(MI);
6803 return Legalized;
6804 case TargetOpcode::G_SELECT: {
6805 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6806 if (TypeIdx == 1) {
6807 if (!CondTy.isScalar() ||
6808 DstTy.getElementCount() != MoreTy.getElementCount())
6809 return UnableToLegalize;
6810
6811 // This is turning a scalar select of vectors into a vector
6812 // select. Broadcast the select condition.
6813 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6814 Observer.changingInstr(MI);
6815 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6816 Observer.changedInstr(MI);
6817 return Legalized;
6818 }
6819
6820 if (CondTy.isVector())
6821 return UnableToLegalize;
6822
6823 Observer.changingInstr(MI);
6824 moreElementsVectorSrc(MI, MoreTy, 2);
6825 moreElementsVectorSrc(MI, MoreTy, 3);
6826 moreElementsVectorDst(MI, MoreTy, 0);
6827 Observer.changedInstr(MI);
6828 return Legalized;
6829 }
6830 case TargetOpcode::G_UNMERGE_VALUES:
6831 return UnableToLegalize;
6832 case TargetOpcode::G_PHI:
6833 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6834 case TargetOpcode::G_SHUFFLE_VECTOR:
6835 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6836 case TargetOpcode::G_BUILD_VECTOR: {
6838 for (auto Op : MI.uses()) {
6839 Elts.push_back(Op.getReg());
6840 }
6841
6842 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6843 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6844 }
6845
6846 MIRBuilder.buildDeleteTrailingVectorElements(
6847 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6848 MI.eraseFromParent();
6849 return Legalized;
6850 }
6851 case TargetOpcode::G_SEXT:
6852 case TargetOpcode::G_ZEXT:
6853 case TargetOpcode::G_ANYEXT:
6854 case TargetOpcode::G_TRUNC:
6855 case TargetOpcode::G_FPTRUNC:
6856 case TargetOpcode::G_FPEXT:
6857 case TargetOpcode::G_FPTOSI:
6858 case TargetOpcode::G_FPTOUI:
6859 case TargetOpcode::G_FPTOSI_SAT:
6860 case TargetOpcode::G_FPTOUI_SAT:
6861 case TargetOpcode::G_SITOFP:
6862 case TargetOpcode::G_UITOFP: {
6863 Observer.changingInstr(MI);
6864 LLT SrcExtTy;
6865 LLT DstExtTy;
6866 if (TypeIdx == 0) {
6867 DstExtTy = MoreTy;
6868 SrcExtTy = MoreTy.changeElementType(
6869 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6870 } else {
6871 DstExtTy = MoreTy.changeElementType(
6872 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6873 SrcExtTy = MoreTy;
6874 }
6875 moreElementsVectorSrc(MI, SrcExtTy, 1);
6876 moreElementsVectorDst(MI, DstExtTy, 0);
6877 Observer.changedInstr(MI);
6878 return Legalized;
6879 }
6880 case TargetOpcode::G_ICMP:
6881 case TargetOpcode::G_FCMP: {
6882 if (TypeIdx != 1)
6883 return UnableToLegalize;
6884
6885 Observer.changingInstr(MI);
6886 moreElementsVectorSrc(MI, MoreTy, 2);
6887 moreElementsVectorSrc(MI, MoreTy, 3);
6888 LLT CondTy = MoreTy.changeVectorElementType(
6889 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6890 moreElementsVectorDst(MI, CondTy, 0);
6891 Observer.changedInstr(MI);
6892 return Legalized;
6893 }
6894 case TargetOpcode::G_BITCAST: {
6895 if (TypeIdx != 0)
6896 return UnableToLegalize;
6897
6898 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6899 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6900
6901 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6902 if (coefficient % DstTy.getNumElements() != 0)
6903 return UnableToLegalize;
6904
6905 coefficient = coefficient / DstTy.getNumElements();
6906
6907 LLT NewTy = SrcTy.changeElementCount(
6908 ElementCount::get(coefficient, MoreTy.isScalable()));
6909 Observer.changingInstr(MI);
6910 moreElementsVectorSrc(MI, NewTy, 1);
6911 moreElementsVectorDst(MI, MoreTy, 0);
6912 Observer.changedInstr(MI);
6913 return Legalized;
6914 }
6915 case TargetOpcode::G_VECREDUCE_FADD:
6916 case TargetOpcode::G_VECREDUCE_FMUL:
6917 case TargetOpcode::G_VECREDUCE_ADD:
6918 case TargetOpcode::G_VECREDUCE_MUL:
6919 case TargetOpcode::G_VECREDUCE_AND:
6920 case TargetOpcode::G_VECREDUCE_OR:
6921 case TargetOpcode::G_VECREDUCE_XOR:
6922 case TargetOpcode::G_VECREDUCE_SMAX:
6923 case TargetOpcode::G_VECREDUCE_SMIN:
6924 case TargetOpcode::G_VECREDUCE_UMAX:
6925 case TargetOpcode::G_VECREDUCE_UMIN: {
6926 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6927 MachineOperand &MO = MI.getOperand(1);
6928 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6929 auto NeutralElement = getNeutralElementForVecReduce(
6930 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6931
6932 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6933 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6934 i != e; i++) {
6935 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6936 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6937 NeutralElement, Idx);
6938 }
6939
6940 Observer.changingInstr(MI);
6941 MO.setReg(NewVec.getReg(0));
6942 Observer.changedInstr(MI);
6943 return Legalized;
6944 }
6945
6946 default:
6947 return UnableToLegalize;
6948 }
6949}
6950
6953 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6954 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6955 unsigned MaskNumElts = Mask.size();
6956 unsigned SrcNumElts = SrcTy.getNumElements();
6957 LLT DestEltTy = DstTy.getElementType();
6958
6959 if (MaskNumElts == SrcNumElts)
6960 return Legalized;
6961
6962 if (MaskNumElts < SrcNumElts) {
6963 // Extend mask to match new destination vector size with
6964 // undef values.
6965 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6966 llvm::copy(Mask, NewMask.begin());
6967
6968 moreElementsVectorDst(MI, SrcTy, 0);
6969 MIRBuilder.setInstrAndDebugLoc(MI);
6970 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6971 MI.getOperand(1).getReg(),
6972 MI.getOperand(2).getReg(), NewMask);
6973 MI.eraseFromParent();
6974
6975 return Legalized;
6976 }
6977
6978 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6979 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6980 LLT PaddedTy =
6981 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6982
6983 // Create new source vectors by concatenating the initial
6984 // source vectors with undefined vectors of the same size.
6985 auto Undef = MIRBuilder.buildUndef(SrcTy);
6986 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6987 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6988 MOps1[0] = MI.getOperand(1).getReg();
6989 MOps2[0] = MI.getOperand(2).getReg();
6990
6991 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6992 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6993
6994 // Readjust mask for new input vector length.
6995 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6996 for (unsigned I = 0; I != MaskNumElts; ++I) {
6997 int Idx = Mask[I];
6998 if (Idx >= static_cast<int>(SrcNumElts))
6999 Idx += PaddedMaskNumElts - SrcNumElts;
7000 MappedOps[I] = Idx;
7001 }
7002
7003 // If we got more elements than required, extract subvector.
7004 if (MaskNumElts != PaddedMaskNumElts) {
7005 auto Shuffle =
7006 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7007
7008 SmallVector<Register, 16> Elts(MaskNumElts);
7009 for (unsigned I = 0; I < MaskNumElts; ++I) {
7010 Elts[I] =
7011 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
7012 .getReg(0);
7013 }
7014 MIRBuilder.buildBuildVector(DstReg, Elts);
7015 } else {
7016 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7017 }
7018
7019 MI.eraseFromParent();
7021}
7022
7025 unsigned int TypeIdx, LLT MoreTy) {
7026 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
7027 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7028 unsigned NumElts = DstTy.getNumElements();
7029 unsigned WidenNumElts = MoreTy.getNumElements();
7030
7031 if (DstTy.isVector() && Src1Ty.isVector() &&
7032 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7034 }
7035
7036 if (TypeIdx != 0)
7037 return UnableToLegalize;
7038
7039 // Expect a canonicalized shuffle.
7040 if (DstTy != Src1Ty || DstTy != Src2Ty)
7041 return UnableToLegalize;
7042
7043 moreElementsVectorSrc(MI, MoreTy, 1);
7044 moreElementsVectorSrc(MI, MoreTy, 2);
7045
7046 // Adjust mask based on new input vector length.
7047 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7048 for (unsigned I = 0; I != NumElts; ++I) {
7049 int Idx = Mask[I];
7050 if (Idx < static_cast<int>(NumElts))
7051 NewMask[I] = Idx;
7052 else
7053 NewMask[I] = Idx - NumElts + WidenNumElts;
7054 }
7055 moreElementsVectorDst(MI, MoreTy, 0);
7056 MIRBuilder.setInstrAndDebugLoc(MI);
7057 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7058 MI.getOperand(1).getReg(),
7059 MI.getOperand(2).getReg(), NewMask);
7060 MI.eraseFromParent();
7061 return Legalized;
7062}
7063
7064void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7065 ArrayRef<Register> Src1Regs,
7066 ArrayRef<Register> Src2Regs,
7067 LLT NarrowTy) {
7069 unsigned SrcParts = Src1Regs.size();
7070 unsigned DstParts = DstRegs.size();
7071
7072 unsigned DstIdx = 0; // Low bits of the result.
7073 Register FactorSum =
7074 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7075 DstRegs[DstIdx] = FactorSum;
7076
7077 Register CarrySumPrevDstIdx;
7079
7080 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7081 // Collect low parts of muls for DstIdx.
7082 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7083 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7085 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7086 Factors.push_back(Mul.getReg(0));
7087 }
7088 // Collect high parts of muls from previous DstIdx.
7089 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7090 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7091 MachineInstrBuilder Umulh =
7092 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7093 Factors.push_back(Umulh.getReg(0));
7094 }
7095 // Add CarrySum from additions calculated for previous DstIdx.
7096 if (DstIdx != 1) {
7097 Factors.push_back(CarrySumPrevDstIdx);
7098 }
7099
7100 Register CarrySum;
7101 // Add all factors and accumulate all carries into CarrySum.
7102 if (DstIdx != DstParts - 1) {
7103 MachineInstrBuilder Uaddo =
7104 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7105 FactorSum = Uaddo.getReg(0);
7106 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7107 for (unsigned i = 2; i < Factors.size(); ++i) {
7108 MachineInstrBuilder Uaddo =
7109 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7110 FactorSum = Uaddo.getReg(0);
7111 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7112 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7113 }
7114 } else {
7115 // Since value for the next index is not calculated, neither is CarrySum.
7116 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7117 for (unsigned i = 2; i < Factors.size(); ++i)
7118 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7119 }
7120
7121 CarrySumPrevDstIdx = CarrySum;
7122 DstRegs[DstIdx] = FactorSum;
7123 Factors.clear();
7124 }
7125}
7126
7129 LLT NarrowTy) {
7130 if (TypeIdx != 0)
7131 return UnableToLegalize;
7132
7133 Register DstReg = MI.getOperand(0).getReg();
7134 LLT DstType = MRI.getType(DstReg);
7135 // FIXME: add support for vector types
7136 if (DstType.isVector())
7137 return UnableToLegalize;
7138
7139 unsigned Opcode = MI.getOpcode();
7140 unsigned OpO, OpE, OpF;
7141 switch (Opcode) {
7142 case TargetOpcode::G_SADDO:
7143 case TargetOpcode::G_SADDE:
7144 case TargetOpcode::G_UADDO:
7145 case TargetOpcode::G_UADDE:
7146 case TargetOpcode::G_ADD:
7147 OpO = TargetOpcode::G_UADDO;
7148 OpE = TargetOpcode::G_UADDE;
7149 OpF = TargetOpcode::G_UADDE;
7150 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7151 OpF = TargetOpcode::G_SADDE;
7152 break;
7153 case TargetOpcode::G_SSUBO:
7154 case TargetOpcode::G_SSUBE:
7155 case TargetOpcode::G_USUBO:
7156 case TargetOpcode::G_USUBE:
7157 case TargetOpcode::G_SUB:
7158 OpO = TargetOpcode::G_USUBO;
7159 OpE = TargetOpcode::G_USUBE;
7160 OpF = TargetOpcode::G_USUBE;
7161 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7162 OpF = TargetOpcode::G_SSUBE;
7163 break;
7164 default:
7165 llvm_unreachable("Unexpected add/sub opcode!");
7166 }
7167
7168 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7169 unsigned NumDefs = MI.getNumExplicitDefs();
7170 Register Src1 = MI.getOperand(NumDefs).getReg();
7171 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7172 Register CarryDst, CarryIn;
7173 if (NumDefs == 2)
7174 CarryDst = MI.getOperand(1).getReg();
7175 if (MI.getNumOperands() == NumDefs + 3)
7176 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7177
7178 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7179 LLT LeftoverTy, DummyTy;
7180 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7181 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7182 MIRBuilder, MRI);
7183 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7184 MRI);
7185
7186 int NarrowParts = Src1Regs.size();
7187 Src1Regs.append(Src1Left);
7188 Src2Regs.append(Src2Left);
7189 DstRegs.reserve(Src1Regs.size());
7190
7191 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7192 Register DstReg =
7193 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7194 Register CarryOut;
7195 // Forward the final carry-out to the destination register
7196 if (i == e - 1 && CarryDst)
7197 CarryOut = CarryDst;
7198 else
7199 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7200
7201 if (!CarryIn) {
7202 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7203 {Src1Regs[i], Src2Regs[i]});
7204 } else if (i == e - 1) {
7205 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7206 {Src1Regs[i], Src2Regs[i], CarryIn});
7207 } else {
7208 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7209 {Src1Regs[i], Src2Regs[i], CarryIn});
7210 }
7211
7212 DstRegs.push_back(DstReg);
7213 CarryIn = CarryOut;
7214 }
7215 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7216 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7217 ArrayRef(DstRegs).drop_front(NarrowParts));
7218
7219 MI.eraseFromParent();
7220 return Legalized;
7221}
7222
7225 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7226
7227 LLT Ty = MRI.getType(DstReg);
7228 if (Ty.isVector())
7229 return UnableToLegalize;
7230
7231 unsigned Size = Ty.getSizeInBits();
7232 unsigned NarrowSize = NarrowTy.getSizeInBits();
7233 if (Size % NarrowSize != 0)
7234 return UnableToLegalize;
7235
7236 unsigned NumParts = Size / NarrowSize;
7237 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7238 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7239
7240 SmallVector<Register, 2> Src1Parts, Src2Parts;
7241 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7242 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7243 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7244 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7245
7246 // Take only high half of registers if this is high mul.
7247 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7248 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7249 MI.eraseFromParent();
7250 return Legalized;
7251}
7252
7255 LLT NarrowTy) {
7256 if (TypeIdx != 0)
7257 return UnableToLegalize;
7258
7259 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7260
7261 Register Src = MI.getOperand(1).getReg();
7262 LLT SrcTy = MRI.getType(Src);
7263
7264 // If all finite floats fit into the narrowed integer type, we can just swap
7265 // out the result type. This is practically only useful for conversions from
7266 // half to at least 16-bits, so just handle the one case.
7267 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7268 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7269 return UnableToLegalize;
7270
7271 Observer.changingInstr(MI);
7272 narrowScalarDst(MI, NarrowTy, 0,
7273 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7274 Observer.changedInstr(MI);
7275 return Legalized;
7276}
7277
7280 LLT NarrowTy) {
7281 if (TypeIdx != 1)
7282 return UnableToLegalize;
7283
7284 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7285
7286 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7287 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7288 // NarrowSize.
7289 if (SizeOp1 % NarrowSize != 0)
7290 return UnableToLegalize;
7291 int NumParts = SizeOp1 / NarrowSize;
7292
7293 SmallVector<Register, 2> SrcRegs, DstRegs;
7294 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7295 MIRBuilder, MRI);
7296
7297 Register OpReg = MI.getOperand(0).getReg();
7298 uint64_t OpStart = MI.getOperand(2).getImm();
7299 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7300 for (int i = 0; i < NumParts; ++i) {
7301 unsigned SrcStart = i * NarrowSize;
7302
7303 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7304 // No part of the extract uses this subregister, ignore it.
7305 continue;
7306 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7307 // The entire subregister is extracted, forward the value.
7308 DstRegs.push_back(SrcRegs[i]);
7309 continue;
7310 }
7311
7312 // OpSegStart is where this destination segment would start in OpReg if it
7313 // extended infinitely in both directions.
7314 int64_t ExtractOffset;
7315 uint64_t SegSize;
7316 if (OpStart < SrcStart) {
7317 ExtractOffset = 0;
7318 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7319 } else {
7320 ExtractOffset = OpStart - SrcStart;
7321 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7322 }
7323
7324 Register SegReg = SrcRegs[i];
7325 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7326 // A genuine extract is needed.
7327 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7328 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7329 }
7330
7331 DstRegs.push_back(SegReg);
7332 }
7333
7334 Register DstReg = MI.getOperand(0).getReg();
7335 if (MRI.getType(DstReg).isVector())
7336 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7337 else if (DstRegs.size() > 1)
7338 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7339 else
7340 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7341 MI.eraseFromParent();
7342 return Legalized;
7343}
7344
7347 LLT NarrowTy) {
7348 // FIXME: Don't know how to handle secondary types yet.
7349 if (TypeIdx != 0)
7350 return UnableToLegalize;
7351
7352 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7353 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7354 LLT LeftoverTy;
7355 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7356 LeftoverRegs, MIRBuilder, MRI);
7357
7358 SrcRegs.append(LeftoverRegs);
7359
7360 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7361 Register OpReg = MI.getOperand(2).getReg();
7362 uint64_t OpStart = MI.getOperand(3).getImm();
7363 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7364 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7365 unsigned DstStart = I * NarrowSize;
7366
7367 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7368 // The entire subregister is defined by this insert, forward the new
7369 // value.
7370 DstRegs.push_back(OpReg);
7371 continue;
7372 }
7373
7374 Register SrcReg = SrcRegs[I];
7375 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7376 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7377 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7378 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7379 }
7380
7381 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7382 // No part of the insert affects this subregister, forward the original.
7383 DstRegs.push_back(SrcReg);
7384 continue;
7385 }
7386
7387 // OpSegStart is where this destination segment would start in OpReg if it
7388 // extended infinitely in both directions.
7389 int64_t ExtractOffset, InsertOffset;
7390 uint64_t SegSize;
7391 if (OpStart < DstStart) {
7392 InsertOffset = 0;
7393 ExtractOffset = DstStart - OpStart;
7394 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7395 } else {
7396 InsertOffset = OpStart - DstStart;
7397 ExtractOffset = 0;
7398 SegSize =
7399 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7400 }
7401
7402 Register SegReg = OpReg;
7403 if (ExtractOffset != 0 || SegSize != OpSize) {
7404 // A genuine extract is needed.
7405 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7406 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7407 }
7408
7409 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7410 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7411 DstRegs.push_back(DstReg);
7412 }
7413
7414 uint64_t WideSize = DstRegs.size() * NarrowSize;
7415 Register DstReg = MI.getOperand(0).getReg();
7416 if (WideSize > RegTy.getSizeInBits()) {
7417 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7418 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7419 MIRBuilder.buildTrunc(DstReg, MergeReg);
7420 } else
7421 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7422
7423 MI.eraseFromParent();
7424 return Legalized;
7425}
7426
7429 LLT NarrowTy) {
7430 Register DstReg = MI.getOperand(0).getReg();
7431 LLT DstTy = MRI.getType(DstReg);
7432
7433 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7434
7435 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7436 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7437 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7438 LLT LeftoverTy;
7439 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7440 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7441 return UnableToLegalize;
7442
7443 LLT Unused;
7444 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7445 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7446 llvm_unreachable("inconsistent extractParts result");
7447
7448 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7449 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7450 {Src0Regs[I], Src1Regs[I]});
7451 DstRegs.push_back(Inst.getReg(0));
7452 }
7453
7454 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7455 auto Inst = MIRBuilder.buildInstr(
7456 MI.getOpcode(),
7457 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7458 DstLeftoverRegs.push_back(Inst.getReg(0));
7459 }
7460
7461 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7462 LeftoverTy, DstLeftoverRegs);
7463
7464 MI.eraseFromParent();
7465 return Legalized;
7466}
7467
7470 LLT NarrowTy) {
7471 if (TypeIdx != 0)
7472 return UnableToLegalize;
7473
7474 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7475
7476 LLT DstTy = MRI.getType(DstReg);
7477 if (DstTy.isVector())
7478 return UnableToLegalize;
7479
7481 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7482 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7483 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7484
7485 MI.eraseFromParent();
7486 return Legalized;
7487}
7488
7491 LLT NarrowTy) {
7492 if (TypeIdx != 0)
7493 return UnableToLegalize;
7494
7495 Register CondReg = MI.getOperand(1).getReg();
7496 LLT CondTy = MRI.getType(CondReg);
7497 if (CondTy.isVector()) // TODO: Handle vselect
7498 return UnableToLegalize;
7499
7500 Register DstReg = MI.getOperand(0).getReg();
7501 LLT DstTy = MRI.getType(DstReg);
7502
7503 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7504 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7505 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7506 LLT LeftoverTy;
7507 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7508 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7509 return UnableToLegalize;
7510
7511 LLT Unused;
7512 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7513 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7514 llvm_unreachable("inconsistent extractParts result");
7515
7516 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7517 auto Select = MIRBuilder.buildSelect(NarrowTy,
7518 CondReg, Src1Regs[I], Src2Regs[I]);
7519 DstRegs.push_back(Select.getReg(0));
7520 }
7521
7522 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7523 auto Select = MIRBuilder.buildSelect(
7524 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7525 DstLeftoverRegs.push_back(Select.getReg(0));
7526 }
7527
7528 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7529 LeftoverTy, DstLeftoverRegs);
7530
7531 MI.eraseFromParent();
7532 return Legalized;
7533}
7534
7537 LLT NarrowTy) {
7538 if (TypeIdx != 1)
7539 return UnableToLegalize;
7540
7541 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7542 unsigned NarrowSize = NarrowTy.getSizeInBits();
7543
7544 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7545 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7546
7548 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7549 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7550 auto C_0 = B.buildConstant(NarrowTy, 0);
7551 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7552 UnmergeSrc.getReg(1), C_0);
7553 auto LoCTLZ = IsUndef ?
7554 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7555 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7556 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7557 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7558 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7559 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7560
7561 MI.eraseFromParent();
7562 return Legalized;
7563 }
7564
7565 return UnableToLegalize;
7566}
7567
7570 LLT NarrowTy) {
7571 if (TypeIdx != 1)
7572 return UnableToLegalize;
7573
7574 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7575 unsigned NarrowSize = NarrowTy.getSizeInBits();
7576
7577 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7578 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7579
7581 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7582 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7583 auto C_0 = B.buildConstant(NarrowTy, 0);
7584 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7585 UnmergeSrc.getReg(0), C_0);
7586 auto HiCTTZ = IsUndef ?
7587 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7588 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7589 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7590 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7591 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7592 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7593
7594 MI.eraseFromParent();
7595 return Legalized;
7596 }
7597
7598 return UnableToLegalize;
7599}
7600
7603 LLT NarrowTy) {
7604 if (TypeIdx != 1)
7605 return UnableToLegalize;
7606
7607 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7608 unsigned NarrowSize = NarrowTy.getSizeInBits();
7609
7610 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7611 return UnableToLegalize;
7612
7614
7615 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7616 Register Lo = UnmergeSrc.getReg(0);
7617 Register Hi = UnmergeSrc.getReg(1);
7618
7619 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7620 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7621
7622 auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
7623
7624 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7625 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7626 // Otherwise the leading zeros represent additional sign bits of the original
7627 // value.
7628 auto LoInv = B.buildXor(DstTy, Lo, Sign);
7629 auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
7630
7631 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7632 auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
7633 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7634
7635 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7636
7637 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7638
7639 MI.eraseFromParent();
7640 return Legalized;
7641}
7642
7645 LLT NarrowTy) {
7646 if (TypeIdx != 1)
7647 return UnableToLegalize;
7648
7649 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7650 unsigned NarrowSize = NarrowTy.getSizeInBits();
7651
7652 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7653 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7654
7655 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7656 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7657 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7658
7659 MI.eraseFromParent();
7660 return Legalized;
7661 }
7662
7663 return UnableToLegalize;
7664}
7665
7668 LLT NarrowTy) {
7669 if (TypeIdx != 1)
7670 return UnableToLegalize;
7671
7673 Register ExpReg = MI.getOperand(2).getReg();
7674 LLT ExpTy = MRI.getType(ExpReg);
7675
7676 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7677
7678 // Clamp the exponent to the range of the target type.
7679 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7680 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7681 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7682 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7683
7684 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7685 Observer.changingInstr(MI);
7686 MI.getOperand(2).setReg(Trunc.getReg(0));
7687 Observer.changedInstr(MI);
7688 return Legalized;
7689}
7690
7693 unsigned Opc = MI.getOpcode();
7694 const auto &TII = MIRBuilder.getTII();
7695 auto isSupported = [this](const LegalityQuery &Q) {
7696 auto QAction = LI.getAction(Q).Action;
7697 return QAction == Legal || QAction == Libcall || QAction == Custom;
7698 };
7699 switch (Opc) {
7700 default:
7701 return UnableToLegalize;
7702 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7703 // This trivially expands to CTLZ.
7704 Observer.changingInstr(MI);
7705 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7706 Observer.changedInstr(MI);
7707 return Legalized;
7708 }
7709 case TargetOpcode::G_CTLZ: {
7710 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7711 unsigned Len = SrcTy.getScalarSizeInBits();
7712
7713 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7714 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7715 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7716 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7717 auto ICmp = MIRBuilder.buildICmp(
7718 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7719 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7720 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7721 MI.eraseFromParent();
7722 return Legalized;
7723 }
7724 // for now, we do this:
7725 // NewLen = NextPowerOf2(Len);
7726 // x = x | (x >> 1);
7727 // x = x | (x >> 2);
7728 // ...
7729 // x = x | (x >>16);
7730 // x = x | (x >>32); // for 64-bit input
7731 // Upto NewLen/2
7732 // return Len - popcount(x);
7733 //
7734 // Ref: "Hacker's Delight" by Henry Warren
7735 Register Op = SrcReg;
7736 unsigned NewLen = PowerOf2Ceil(Len);
7737 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7738 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7739 auto MIBOp = MIRBuilder.buildOr(
7740 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7741 Op = MIBOp.getReg(0);
7742 }
7743 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7744 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7745 MIBPop);
7746 MI.eraseFromParent();
7747 return Legalized;
7748 }
7749 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7750 // This trivially expands to CTTZ.
7751 Observer.changingInstr(MI);
7752 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7753 Observer.changedInstr(MI);
7754 return Legalized;
7755 }
7756 case TargetOpcode::G_CTTZ: {
7757 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7758
7759 unsigned Len = SrcTy.getScalarSizeInBits();
7760 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7761 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7762 // zero.
7763 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7764 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7765 auto ICmp = MIRBuilder.buildICmp(
7766 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7767 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7768 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7769 MI.eraseFromParent();
7770 return Legalized;
7771 }
7772 // for now, we use: { return popcount(~x & (x - 1)); }
7773 // unless the target has ctlz but not ctpop, in which case we use:
7774 // { return 32 - nlz(~x & (x-1)); }
7775 // Ref: "Hacker's Delight" by Henry Warren
7776 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7777 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7778 auto MIBTmp = MIRBuilder.buildAnd(
7779 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7780 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7781 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7782 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7783 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7784 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7785 MI.eraseFromParent();
7786 return Legalized;
7787 }
7788 Observer.changingInstr(MI);
7789 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7790 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7791 Observer.changedInstr(MI);
7792 return Legalized;
7793 }
7794 case TargetOpcode::G_CTPOP: {
7795 Register SrcReg = MI.getOperand(1).getReg();
7796 LLT Ty = MRI.getType(SrcReg);
7797 unsigned Size = Ty.getScalarSizeInBits();
7799
7800 // Bail out on irregular type lengths.
7801 if (Size > 128 || Size % 8 != 0)
7802 return UnableToLegalize;
7803
7804 // Count set bits in blocks of 2 bits. Default approach would be
7805 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7806 // We use following formula instead:
7807 // B2Count = val - { (val >> 1) & 0x55555555 }
7808 // since it gives same result in blocks of 2 with one instruction less.
7809 auto C_1 = B.buildConstant(Ty, 1);
7810 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7811 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7812 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7813 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7814 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7815
7816 // In order to get count in blocks of 4 add values from adjacent block of 2.
7817 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7818 auto C_2 = B.buildConstant(Ty, 2);
7819 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7820 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7821 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7822 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7823 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7824 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7825
7826 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7827 // addition since count value sits in range {0,...,8} and 4 bits are enough
7828 // to hold such binary values. After addition high 4 bits still hold count
7829 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7830 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7831 auto C_4 = B.buildConstant(Ty, 4);
7832 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7833 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7834 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7835 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7836 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7837
7838 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7839
7840 // Avoid the multiply when shift-add is cheaper.
7841 if (Size == 16 && !Ty.isVector()) {
7842 // v = (v + (v >> 8)) & 0xFF;
7843 auto C_8 = B.buildConstant(Ty, 8);
7844 auto HighSum = B.buildLShr(Ty, B8Count, C_8);
7845 auto Res = B.buildAdd(Ty, B8Count, HighSum);
7846 B.buildAnd(MI.getOperand(0).getReg(), Res, B.buildConstant(Ty, 0xFF));
7847 MI.eraseFromParent();
7848 return Legalized;
7849 }
7850
7851 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7852 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7853 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7854
7855 // Shift count result from 8 high bits to low bits.
7856 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7857
7858 auto IsMulSupported = [this](const LLT Ty) {
7859 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7860 return Action == Legal || Action == WidenScalar || Action == Custom;
7861 };
7862 if (IsMulSupported(Ty)) {
7863 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7864 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7865 } else {
7866 auto ResTmp = B8Count;
7867 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7868 auto ShiftC = B.buildConstant(Ty, Shift);
7869 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7870 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7871 }
7872 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7873 }
7874 MI.eraseFromParent();
7875 return Legalized;
7876 }
7877 case TargetOpcode::G_CTLS: {
7878 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7879
7880 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7881 auto SignIdxC =
7882 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7883 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7884
7885 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7886
7887 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7888 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7889
7890 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7891 MI.eraseFromParent();
7892 return Legalized;
7893 }
7894 }
7895}
7896
7897// Check that (every element of) Reg is undef or not an exact multiple of BW.
7899 Register Reg, unsigned BW) {
7900 return matchUnaryPredicate(
7901 MRI, Reg,
7902 [=](const Constant *C) {
7903 // Null constant here means an undef.
7905 return !CI || CI->getValue().urem(BW) != 0;
7906 },
7907 /*AllowUndefs*/ true);
7908}
7909
7912 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7913 LLT Ty = MRI.getType(Dst);
7914 LLT ShTy = MRI.getType(Z);
7915
7916 unsigned BW = Ty.getScalarSizeInBits();
7917
7918 if (!isPowerOf2_32(BW))
7919 return UnableToLegalize;
7920
7921 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7922 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7923
7924 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7925 // fshl X, Y, Z -> fshr X, Y, -Z
7926 // fshr X, Y, Z -> fshl X, Y, -Z
7927 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7928 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7929 } else {
7930 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7931 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7932 auto One = MIRBuilder.buildConstant(ShTy, 1);
7933 if (IsFSHL) {
7934 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7935 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7936 } else {
7937 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7938 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7939 }
7940
7941 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7942 }
7943
7944 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7945 MI.eraseFromParent();
7946 return Legalized;
7947}
7948
7951 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7952 LLT Ty = MRI.getType(Dst);
7953 LLT ShTy = MRI.getType(Z);
7954
7955 const unsigned BW = Ty.getScalarSizeInBits();
7956 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7957
7958 Register ShX, ShY;
7959 Register ShAmt, InvShAmt;
7960
7961 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7962 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7963 // fshl: X << C | Y >> (BW - C)
7964 // fshr: X << (BW - C) | Y >> C
7965 // where C = Z % BW is not zero
7966 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7967 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7968 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7969 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7970 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7971 } else {
7972 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7973 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7974 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7975 if (isPowerOf2_32(BW)) {
7976 // Z % BW -> Z & (BW - 1)
7977 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7978 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7979 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7980 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7981 } else {
7982 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7983 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7984 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7985 }
7986
7987 auto One = MIRBuilder.buildConstant(ShTy, 1);
7988 if (IsFSHL) {
7989 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7990 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7991 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7992 } else {
7993 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7994 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7995 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7996 }
7997 }
7998
7999 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
8000 MI.eraseFromParent();
8001 return Legalized;
8002}
8003
8006 // These operations approximately do the following (while avoiding undefined
8007 // shifts by BW):
8008 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8009 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8010 Register Dst = MI.getOperand(0).getReg();
8011 LLT Ty = MRI.getType(Dst);
8012 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
8013
8014 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
8015 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8016
8017 // TODO: Use smarter heuristic that accounts for vector legalization.
8018 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
8019 return lowerFunnelShiftAsShifts(MI);
8020
8021 // This only works for powers of 2, fallback to shifts if it fails.
8022 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
8023 if (Result == UnableToLegalize)
8024 return lowerFunnelShiftAsShifts(MI);
8025 return Result;
8026}
8027
8029 auto [Dst, Src] = MI.getFirst2Regs();
8030 LLT DstTy = MRI.getType(Dst);
8031 LLT SrcTy = MRI.getType(Src);
8032
8033 uint32_t DstTySize = DstTy.getSizeInBits();
8034 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8035 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8036
8037 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
8038 !isPowerOf2_32(SrcTyScalarSize))
8039 return UnableToLegalize;
8040
8041 // The step between extend is too large, split it by creating an intermediate
8042 // extend instruction
8043 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8044 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
8045 // If the destination type is illegal, split it into multiple statements
8046 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8047 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8048 // Unmerge the vector
8049 LLT EltTy = MidTy.changeElementCount(
8051 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8052
8053 // ZExt the vectors
8054 LLT ZExtResTy = DstTy.changeElementCount(
8056 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8057 {UnmergeSrc.getReg(0)});
8058 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8059 {UnmergeSrc.getReg(1)});
8060
8061 // Merge the ending vectors
8062 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8063
8064 MI.eraseFromParent();
8065 return Legalized;
8066 }
8067 return UnableToLegalize;
8068}
8069
8071 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8072 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8073 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8074 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8075 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8076 // %lo16(<4 x s16>) = G_TRUNC %inlo
8077 // %hi16(<4 x s16>) = G_TRUNC %inhi
8078 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8079 // %res(<8 x s8>) = G_TRUNC %in16
8080
8081 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8082
8083 Register DstReg = MI.getOperand(0).getReg();
8084 Register SrcReg = MI.getOperand(1).getReg();
8085 LLT DstTy = MRI.getType(DstReg);
8086 LLT SrcTy = MRI.getType(SrcReg);
8087
8088 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8090 isPowerOf2_32(SrcTy.getNumElements()) &&
8091 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8092 // Split input type.
8093 LLT SplitSrcTy = SrcTy.changeElementCount(
8094 SrcTy.getElementCount().divideCoefficientBy(2));
8095
8096 // First, split the source into two smaller vectors.
8097 SmallVector<Register, 2> SplitSrcs;
8098 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8099
8100 // Truncate the splits into intermediate narrower elements.
8101 LLT InterTy;
8102 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8103 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8104 else
8105 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8106 for (Register &Src : SplitSrcs)
8107 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8108
8109 // Combine the new truncates into one vector
8110 auto Merge = MIRBuilder.buildMergeLikeInstr(
8111 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8112
8113 // Truncate the new vector to the final result type
8114 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8115 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8116 else
8117 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8118
8119 MI.eraseFromParent();
8120
8121 return Legalized;
8122 }
8123 return UnableToLegalize;
8124}
8125
8128 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8129 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8130 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8131 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8132 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8133 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8134 MI.eraseFromParent();
8135 return Legalized;
8136}
8137
8139 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8140
8141 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8142 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8143
8144 MIRBuilder.setInstrAndDebugLoc(MI);
8145
8146 // If a rotate in the other direction is supported, use it.
8147 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8148 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8149 isPowerOf2_32(EltSizeInBits))
8150 return lowerRotateWithReverseRotate(MI);
8151
8152 // If a funnel shift is supported, use it.
8153 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8154 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8155 bool IsFShLegal = false;
8156 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8157 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8158 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8159 Register R3) {
8160 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8161 MI.eraseFromParent();
8162 return Legalized;
8163 };
8164 // If a funnel shift in the other direction is supported, use it.
8165 if (IsFShLegal) {
8166 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8167 } else if (isPowerOf2_32(EltSizeInBits)) {
8168 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8169 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8170 }
8171 }
8172
8173 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8174 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8175 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8176 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8177 Register ShVal;
8178 Register RevShiftVal;
8179 if (isPowerOf2_32(EltSizeInBits)) {
8180 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8181 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8182 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8183 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8184 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8185 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8186 RevShiftVal =
8187 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8188 } else {
8189 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8190 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8191 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8192 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8193 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8194 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8195 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8196 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8197 RevShiftVal =
8198 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8199 }
8200 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8201 MI.eraseFromParent();
8202 return Legalized;
8203}
8204
8205// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8206// representation.
8209 auto [Dst, Src] = MI.getFirst2Regs();
8210 const LLT S64 = LLT::scalar(64);
8211 const LLT S32 = LLT::scalar(32);
8212 const LLT S1 = LLT::scalar(1);
8213
8214 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8215
8216 // unsigned cul2f(ulong u) {
8217 // uint lz = clz(u);
8218 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8219 // u = (u << lz) & 0x7fffffffffffffffUL;
8220 // ulong t = u & 0xffffffffffUL;
8221 // uint v = (e << 23) | (uint)(u >> 40);
8222 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8223 // return as_float(v + r);
8224 // }
8225
8226 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8227 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8228
8229 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8230
8231 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8232 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8233
8234 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8235 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8236
8237 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8238 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8239
8240 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8241
8242 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8243 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8244
8245 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8246 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8247 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8248
8249 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8250 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8251 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8252 auto One = MIRBuilder.buildConstant(S32, 1);
8253
8254 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8255 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8256 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8257 MIRBuilder.buildAdd(Dst, V, R);
8258
8259 MI.eraseFromParent();
8260 return Legalized;
8261}
8262
8263// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8264// operations and G_SITOFP
8267 auto [Dst, Src] = MI.getFirst2Regs();
8268 const LLT S64 = LLT::scalar(64);
8269 const LLT S32 = LLT::scalar(32);
8270 const LLT S1 = LLT::scalar(1);
8271
8272 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8273
8274 // For i64 < INT_MAX we simply reuse SITOFP.
8275 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8276 // saved before division, convert to float by SITOFP, multiply the result
8277 // by 2.
8278 auto One = MIRBuilder.buildConstant(S64, 1);
8279 auto Zero = MIRBuilder.buildConstant(S64, 0);
8280 // Result if Src < INT_MAX
8281 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8282 // Result if Src >= INT_MAX
8283 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8284 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8285 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8286 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8287 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8288 // Check if the original value is larger than INT_MAX by comparing with
8289 // zero to pick one of the two conversions.
8290 auto IsLarge =
8291 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8292 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8293
8294 MI.eraseFromParent();
8295 return Legalized;
8296}
8297
8298// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8299// IEEE double representation.
8302 auto [Dst, Src] = MI.getFirst2Regs();
8303 const LLT S64 = LLT::scalar(64);
8304 const LLT S32 = LLT::scalar(32);
8305
8306 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8307
8308 // We create double value from 32 bit parts with 32 exponent difference.
8309 // Note that + and - are float operations that adjust the implicit leading
8310 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8311 //
8312 // X = 2^52 * 1.0...LowBits
8313 // Y = 2^84 * 1.0...HighBits
8314 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8315 // = - 2^52 * 1.0...HighBits
8316 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8317 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8318 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8319 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8320 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8321 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8322
8323 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8324 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8325 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8326 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8327 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8328 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8329 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8330
8331 MI.eraseFromParent();
8332 return Legalized;
8333}
8334
8335/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8336/// convert fpround f64->f16 without double-rounding, so we manually perform the
8337/// lowering here where we know it is valid.
8340 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8341 auto DstFpTy =
8342 SrcTy.changeElementType(LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8343 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8344 ? MIRBuilder.buildUITOFP(DstFpTy, Src)
8345 : MIRBuilder.buildSITOFP(DstFpTy, Src);
8346 LLT F32Ty = DstFpTy.changeElementSize(32);
8347 auto M2 = MIRBuilder.buildFPTrunc(F32Ty, M1);
8348 MIRBuilder.buildFPTrunc(Dst, M2);
8349 MI.eraseFromParent();
8351}
8352
8354 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8355
8356 if (SrcTy == LLT::scalar(1)) {
8357 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8358 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8359 MIRBuilder.buildSelect(Dst, Src, True, False);
8360 MI.eraseFromParent();
8361 return Legalized;
8362 }
8363
8364 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8365 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8366
8367 if (SrcTy != LLT::scalar(64))
8368 return UnableToLegalize;
8369
8370 if (DstTy == LLT::scalar(32))
8371 // TODO: SelectionDAG has several alternative expansions to port which may
8372 // be more reasonable depending on the available instructions. We also need
8373 // a more advanced mechanism to choose an optimal version depending on
8374 // target features such as sitofp or CTLZ availability.
8376
8377 if (DstTy == LLT::scalar(64))
8379
8380 return UnableToLegalize;
8381}
8382
8384 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8385
8386 const LLT I64 = LLT::integer(64);
8387 const LLT I32 = LLT::integer(32);
8388 const LLT I1 = LLT::integer(1);
8389
8390 if (SrcTy == I1) {
8391 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8392 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8393 MIRBuilder.buildSelect(Dst, Src, True, False);
8394 MI.eraseFromParent();
8395 return Legalized;
8396 }
8397
8398 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8399 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8400
8401 if (SrcTy != I64)
8402 return UnableToLegalize;
8403
8404 if (DstTy.getScalarSizeInBits() == 32) {
8405 // signed cl2f(long l) {
8406 // long s = l >> 63;
8407 // float r = cul2f((l + s) ^ s);
8408 // return s ? -r : r;
8409 // }
8410 Register L = Src;
8411 auto SignBit = MIRBuilder.buildConstant(I64, 63);
8412 auto S = MIRBuilder.buildAShr(I64, L, SignBit);
8413
8414 auto LPlusS = MIRBuilder.buildAdd(I64, L, S);
8415 auto Xor = MIRBuilder.buildXor(I64, LPlusS, S);
8416 auto R = MIRBuilder.buildUITOFP(I32, Xor);
8417
8418 auto RNeg = MIRBuilder.buildFNeg(I32, R);
8419 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, I1, S,
8420 MIRBuilder.buildConstant(I64, 0));
8421 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8422 MI.eraseFromParent();
8423 return Legalized;
8424 }
8425
8426 return UnableToLegalize;
8427}
8428
8430 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8431 const LLT S64 = LLT::scalar(64);
8432 const LLT S32 = LLT::scalar(32);
8433
8434 if (SrcTy != S64 && SrcTy != S32)
8435 return UnableToLegalize;
8436 if (DstTy != S32 && DstTy != S64)
8437 return UnableToLegalize;
8438
8439 // FPTOSI gives same result as FPTOUI for positive signed integers.
8440 // FPTOUI needs to deal with fp values that convert to unsigned integers
8441 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8442
8443 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8444 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8446 APInt::getZero(SrcTy.getSizeInBits()));
8447 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8448
8449 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8450
8451 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8452 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8453 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8454 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8455 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8456 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8457 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8458
8459 const LLT S1 = LLT::scalar(1);
8460
8461 MachineInstrBuilder FCMP =
8462 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8463 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8464
8465 MI.eraseFromParent();
8466 return Legalized;
8467}
8468
8470 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8471 const LLT S64 = LLT::scalar(64);
8472 const LLT S32 = LLT::scalar(32);
8473
8474 // FIXME: Only f32 to i64 conversions are supported.
8475 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8476 return UnableToLegalize;
8477
8478 // Expand f32 -> i64 conversion
8479 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8480 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8481
8482 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8483
8484 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8485 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8486
8487 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8488 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8489
8490 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8491 APInt::getSignMask(SrcEltBits));
8492 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8493 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8494 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8495 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8496
8497 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8498 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8499 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8500
8501 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8502 R = MIRBuilder.buildZExt(DstTy, R);
8503
8504 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8505 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8506 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8507 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8508
8509 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8510 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8511
8512 const LLT S1 = LLT::scalar(1);
8513 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8514 S1, Exponent, ExponentLoBit);
8515
8516 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8517
8518 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8519 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8520
8521 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8522
8523 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8524 S1, Exponent, ZeroSrcTy);
8525
8526 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8527 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8528
8529 MI.eraseFromParent();
8530 return Legalized;
8531}
8532
8535 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8536
8537 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8538 unsigned SatWidth = DstTy.getScalarSizeInBits();
8539
8540 // Determine minimum and maximum integer values and their corresponding
8541 // floating-point values.
8542 APInt MinInt, MaxInt;
8543 if (IsSigned) {
8544 MinInt = APInt::getSignedMinValue(SatWidth);
8545 MaxInt = APInt::getSignedMaxValue(SatWidth);
8546 } else {
8547 MinInt = APInt::getMinValue(SatWidth);
8548 MaxInt = APInt::getMaxValue(SatWidth);
8549 }
8550
8551 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8552 APFloat MinFloat(Semantics);
8553 APFloat MaxFloat(Semantics);
8554
8555 APFloat::opStatus MinStatus =
8556 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8557 APFloat::opStatus MaxStatus =
8558 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8559 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8560 !(MaxStatus & APFloat::opStatus::opInexact);
8561
8562 // If the integer bounds are exactly representable as floats, emit a
8563 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8564 // and selects.
8565 if (AreExactFloatBounds) {
8566 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8567 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8568 auto MaxP =
8569 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src, MaxC);
8570 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8571 // Clamp by MaxFloat from above. NaN cannot occur.
8572 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8573 auto MinP = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, LLT::integer(1), Max,
8575 auto Min =
8576 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8577 // Convert clamped value to integer. In the unsigned case we're done,
8578 // because we mapped NaN to MinFloat, which will cast to zero.
8579 if (!IsSigned) {
8580 MIRBuilder.buildFPTOUI(Dst, Min);
8581 MI.eraseFromParent();
8582 return Legalized;
8583 }
8584
8585 // Otherwise, select 0 if Src is NaN.
8586 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8587 auto IsZero =
8588 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8589 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8590 FpToInt);
8591 MI.eraseFromParent();
8592 return Legalized;
8593 }
8594
8595 // Result of direct conversion. The assumption here is that the operation is
8596 // non-trapping and it's fine to apply it to an out-of-range value if we
8597 // select it away later.
8598 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8599 : MIRBuilder.buildFPTOUI(DstTy, Src);
8600
8601 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8602 // MinInt if Src is NaN.
8603 auto ULT = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, LLT::integer(1), Src,
8604 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8605 auto Max = MIRBuilder.buildSelect(
8606 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8607 // If Src OGT MaxFloat, select MaxInt.
8608 auto OGT = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::integer(1), Src,
8609 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8610
8611 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8612 // is already zero.
8613 if (!IsSigned) {
8614 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8615 Max);
8616 MI.eraseFromParent();
8617 return Legalized;
8618 }
8619
8620 // Otherwise, select 0 if Src is NaN.
8621 auto Min = MIRBuilder.buildSelect(
8622 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8623 auto IsZero =
8624 MIRBuilder.buildFCmp(CmpInst::FCMP_UNO, LLT::integer(1), Src, Src);
8625 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8626 MI.eraseFromParent();
8627 return Legalized;
8628}
8629
8630// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8633 const LLT S1 = LLT::scalar(1);
8634 const LLT S32 = LLT::scalar(32);
8635
8636 auto [Dst, Src] = MI.getFirst2Regs();
8637 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8638 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8639
8640 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8641 return UnableToLegalize;
8642
8643 if (MI.getFlag(MachineInstr::FmAfn)) {
8644 unsigned Flags = MI.getFlags();
8645 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8646 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8647 MI.eraseFromParent();
8648 return Legalized;
8649 }
8650
8651 const unsigned ExpMask = 0x7ff;
8652 const unsigned ExpBiasf64 = 1023;
8653 const unsigned ExpBiasf16 = 15;
8654
8655 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8656 Register U = Unmerge.getReg(0);
8657 Register UH = Unmerge.getReg(1);
8658
8659 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8660 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8661
8662 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8663 // add the f16 bias (15) to get the biased exponent for the f16 format.
8664 E = MIRBuilder.buildAdd(
8665 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8666
8667 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8668 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8669
8670 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8671 MIRBuilder.buildConstant(S32, 0x1ff));
8672 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8673
8674 auto Zero = MIRBuilder.buildConstant(S32, 0);
8675 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8676 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8677 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8678
8679 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8680 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8681 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8682 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8683
8684 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8685 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8686
8687 // N = M | (E << 12);
8688 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8689 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8690
8691 // B = clamp(1-E, 0, 13);
8692 auto One = MIRBuilder.buildConstant(S32, 1);
8693 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8694 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8695 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8696
8697 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8698 MIRBuilder.buildConstant(S32, 0x1000));
8699
8700 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8701 auto D0 = MIRBuilder.buildShl(S32, D, B);
8702
8703 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8704 D0, SigSetHigh);
8705 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8706 D = MIRBuilder.buildOr(S32, D, D1);
8707
8708 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8709 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8710
8711 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8712 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8713
8714 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8715 MIRBuilder.buildConstant(S32, 3));
8716 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8717
8718 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8719 MIRBuilder.buildConstant(S32, 5));
8720 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8721
8722 V1 = MIRBuilder.buildOr(S32, V0, V1);
8723 V = MIRBuilder.buildAdd(S32, V, V1);
8724
8725 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8726 E, MIRBuilder.buildConstant(S32, 30));
8727 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8728 MIRBuilder.buildConstant(S32, 0x7c00), V);
8729
8730 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8731 E, MIRBuilder.buildConstant(S32, 1039));
8732 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8733
8734 // Extract the sign bit.
8735 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8736 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8737
8738 // Insert the sign bit
8739 V = MIRBuilder.buildOr(S32, Sign, V);
8740
8741 MIRBuilder.buildTrunc(Dst, V);
8742 MI.eraseFromParent();
8743 return Legalized;
8744}
8745
8748 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8749 const LLT S64 = LLT::scalar(64);
8750 const LLT S16 = LLT::scalar(16);
8751
8752 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8754
8755 return UnableToLegalize;
8756}
8757
8759 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8760 LLT Ty = MRI.getType(Dst);
8761
8762 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8763 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8764 MI.eraseFromParent();
8765 return Legalized;
8766}
8767
8769 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8770 LLT Ty = MRI.getType(Src);
8771 auto Flags = MI.getFlags();
8772
8773 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Ty, Src, Flags);
8774 auto FracPart = MIRBuilder.buildFSub(Ty, Src, IntPart, Flags);
8775
8776 Register FracToUse;
8777 if (MI.getFlag(MachineInstr::FmNoInfs)) {
8778 FracToUse = FracPart.getReg(0);
8779 } else {
8780 auto Abs = MIRBuilder.buildFAbs(Ty, Src, Flags);
8781 const fltSemantics &Semantics = getFltSemanticForLLT(Ty.getScalarType());
8782 auto Inf = MIRBuilder.buildFConstant(Ty, APFloat::getInf(Semantics));
8783 auto IsInf = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ,
8784 Ty.changeElementSize(1), Abs, Inf);
8785 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8786 auto Select = MIRBuilder.buildSelect(Ty, IsInf, Zero, FracPart);
8787 FracToUse = Select.getReg(0);
8788 }
8789
8790 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8791 MIRBuilder.buildCopy(DstInt, IntPart.getReg(0));
8792
8793 MI.eraseFromParent();
8794 return Legalized;
8795}
8796
8798 switch (Opc) {
8799 case TargetOpcode::G_SMIN:
8800 return CmpInst::ICMP_SLT;
8801 case TargetOpcode::G_SMAX:
8802 return CmpInst::ICMP_SGT;
8803 case TargetOpcode::G_UMIN:
8804 return CmpInst::ICMP_ULT;
8805 case TargetOpcode::G_UMAX:
8806 return CmpInst::ICMP_UGT;
8807 default:
8808 llvm_unreachable("not in integer min/max");
8809 }
8810}
8811
8813 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8814
8815 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8816 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::integer(1));
8817
8818 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8819 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8820
8821 MI.eraseFromParent();
8822 return Legalized;
8823}
8824
8827 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8828
8829 Register Dst = Cmp->getReg(0);
8830 LLT DstTy = MRI.getType(Dst);
8831 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8832 LLT CmpTy = DstTy.changeElementSize(1);
8833
8834 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8837 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8840
8841 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8842 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8843 Cmp->getRHSReg());
8844 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8845 Cmp->getRHSReg());
8846
8847 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8848 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8849 if (TLI.preferSelectsOverBooleanArithmetic(
8850 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8852 auto One = MIRBuilder.buildConstant(DstTy, 1);
8853 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8854
8855 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8856 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8857 } else {
8859 std::swap(IsGT, IsLT);
8860 // Extend boolean results to DstTy, which is at least i2, before subtracting
8861 // them.
8862 unsigned BoolExtOp =
8863 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8864 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8865 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8866 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8867 }
8868
8869 MI.eraseFromParent();
8870 return Legalized;
8871}
8872
8875 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8876 const int Src0Size = Src0Ty.getScalarSizeInBits();
8877 const int Src1Size = Src1Ty.getScalarSizeInBits();
8878
8879 LLT DstIntTy =
8880 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
8881 LLT Src0IntTy = Src0Ty.changeElementType(LLT::integer(Src0Size));
8882 LLT Src1IntTy = Src1Ty.changeElementType(LLT::integer(Src1Size));
8883
8884 Register Src0Int = Src0;
8885 Register Src1Int = Src1;
8886
8887 if (!(Src0Ty.getScalarType().isAnyScalar() ||
8888 Src0Ty.getScalarType().isInteger()))
8889 Src0Int = MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
8890
8891 if (!(Src1Ty.getScalarType().isAnyScalar() ||
8892 Src1Ty.getScalarType().isInteger()))
8893 Src1Int = MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
8894
8895 auto SignBitMask =
8896 MIRBuilder.buildConstant(Src0IntTy, APInt::getSignMask(Src0Size));
8897
8898 auto NotSignBitMask = MIRBuilder.buildConstant(
8899 Src0IntTy, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8900
8901 Register And0 =
8902 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
8903 Register And1;
8904 if (Src0Ty == Src1Ty) {
8905 And1 = MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
8906 } else if (Src0Size > Src1Size) {
8907 auto ShiftAmt = MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
8908 auto Zext = MIRBuilder.buildZExt(Src0IntTy, Src1Int);
8909 auto Shift = MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
8910 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8911 } else {
8912 auto ShiftAmt = MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
8913 auto Shift = MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
8914 auto Trunc = MIRBuilder.buildTrunc(Src0IntTy, Shift);
8915 And1 = MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
8916 }
8917
8918 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8919 // constants are a nan and -0.0, but the final result should preserve
8920 // everything.
8921 unsigned Flags = MI.getFlags();
8922
8923 // We masked the sign bit and the not-sign bit, so these are disjoint.
8924 Flags |= MachineInstr::Disjoint;
8925
8926 if (DstTy == DstIntTy)
8927 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
8928 else {
8929 Register NewDst = MIRBuilder.buildOr(DstIntTy, And0, And1, Flags).getReg(0);
8930 MIRBuilder.buildBitcast(Dst, NewDst);
8931 }
8932
8933 MI.eraseFromParent();
8934 return Legalized;
8935}
8936
8939 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8940 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8941 // depend on fminnum/fmaxnum.
8942
8943 unsigned NewOp;
8944 switch (MI.getOpcode()) {
8945 case TargetOpcode::G_FMINNUM:
8946 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8947 break;
8948 case TargetOpcode::G_FMINIMUMNUM:
8949 NewOp = TargetOpcode::G_FMINNUM;
8950 break;
8951 case TargetOpcode::G_FMAXNUM:
8952 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8953 break;
8954 case TargetOpcode::G_FMAXIMUMNUM:
8955 NewOp = TargetOpcode::G_FMAXNUM;
8956 break;
8957 default:
8958 llvm_unreachable("unexpected min/max opcode");
8959 }
8960
8961 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8962 LLT Ty = MRI.getType(Dst);
8963
8964 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8965 // Insert canonicalizes if it's possible we need to quiet to get correct
8966 // sNaN behavior.
8967
8968 // Note this must be done here, and not as an optimization combine in the
8969 // absence of a dedicate quiet-snan instruction as we're using an
8970 // omni-purpose G_FCANONICALIZE.
8971 if (!isKnownNeverSNaN(Src0, MRI))
8972 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8973
8974 if (!isKnownNeverSNaN(Src1, MRI))
8975 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8976 }
8977
8978 // If there are no nans, it's safe to simply replace this with the non-IEEE
8979 // version.
8980 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8981 MI.eraseFromParent();
8982 return Legalized;
8983}
8984
8987 unsigned Opc = MI.getOpcode();
8988 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8989 LLT Ty = MRI.getType(Dst);
8990 LLT CmpTy = Ty.changeElementSize(1);
8991
8992 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8993 unsigned OpcIeee =
8994 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8995 unsigned OpcNonIeee =
8996 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8997 bool MinMaxMustRespectOrderedZero = false;
8998 Register Res;
8999
9000 // IEEE variants don't need canonicalization
9001 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9002 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
9003 MinMaxMustRespectOrderedZero = true;
9004 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9005 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
9006 } else {
9007 auto Compare = MIRBuilder.buildFCmp(
9008 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
9009 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9010 }
9011
9012 // Propagate any NaN of both operands
9013 if (!MI.getFlag(MachineInstr::FmNoNans) &&
9014 (!isKnownNeverNaN(Src0, MRI) || !isKnownNeverNaN(Src1, MRI))) {
9015 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
9016
9017 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
9018 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
9019 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
9020 if (Ty.isVector())
9021 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9022
9023 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9024 }
9025
9026 // fminimum/fmaximum requires -0.0 less than +0.0
9027 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
9028 GISelValueTracking VT(MIRBuilder.getMF());
9029 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
9030 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
9031
9032 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
9033 const unsigned Flags = MI.getFlags();
9034 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
9035 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
9036
9037 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
9038
9039 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9040 auto LHSSelect =
9041 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9042
9043 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9044 auto RHSSelect =
9045 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9046
9047 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9048 }
9049 }
9050
9051 MIRBuilder.buildCopy(Dst, Res);
9052 MI.eraseFromParent();
9053 return Legalized;
9054}
9055
9057 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9058 Register DstReg = MI.getOperand(0).getReg();
9059 LLT Ty = MRI.getType(DstReg);
9060 unsigned Flags = MI.getFlags();
9061
9062 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
9063 Flags);
9064 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
9065 MI.eraseFromParent();
9066 return Legalized;
9067}
9068
9071 auto [DstReg, X] = MI.getFirst2Regs();
9072 const unsigned Flags = MI.getFlags();
9073 const LLT Ty = MRI.getType(DstReg);
9074 const LLT CondTy = Ty.changeElementSize(1);
9075
9076 // round(x) =>
9077 // t = trunc(x);
9078 // d = fabs(x - t);
9079 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9080 // return t + o;
9081
9082 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
9083
9084 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
9085 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
9086
9087 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
9088 auto Cmp =
9089 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
9090
9091 // Could emit G_UITOFP instead
9092 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
9093 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9094 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9095 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
9096
9097 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9098
9099 MI.eraseFromParent();
9100 return Legalized;
9101}
9102
9104 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9105 unsigned Flags = MI.getFlags();
9106 LLT Ty = MRI.getType(DstReg);
9107 const LLT CondTy = Ty.changeElementSize(1);
9108
9109 // result = trunc(src);
9110 // if (src < 0.0 && src != result)
9111 // result += -1.0.
9112
9113 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9114 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9115
9116 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9117 SrcReg, Zero, Flags);
9118 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9119 SrcReg, Trunc, Flags);
9120 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9121 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9122
9123 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9124 MI.eraseFromParent();
9125 return Legalized;
9126}
9127
9130 const unsigned NumOps = MI.getNumOperands();
9131 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9132 unsigned PartSize = Src0Ty.getSizeInBits();
9133
9134 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9135 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9136
9137 for (unsigned I = 2; I != NumOps; ++I) {
9138 const unsigned Offset = (I - 1) * PartSize;
9139
9140 Register SrcReg = MI.getOperand(I).getReg();
9141 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9142
9143 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9144 MRI.createGenericVirtualRegister(WideTy);
9145
9146 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9147 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9148 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9149 ResultReg = NextResult;
9150 }
9151
9152 if (DstTy.isPointer()) {
9153 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9154 DstTy.getAddressSpace())) {
9155 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9156 return UnableToLegalize;
9157 }
9158
9159 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9160 }
9161
9162 MI.eraseFromParent();
9163 return Legalized;
9164}
9165
9168 const unsigned NumDst = MI.getNumOperands() - 1;
9169 Register SrcReg = MI.getOperand(NumDst).getReg();
9170 Register Dst0Reg = MI.getOperand(0).getReg();
9171 LLT DstTy = MRI.getType(Dst0Reg);
9172 if (DstTy.isPointer())
9173 return UnableToLegalize; // TODO
9174
9175 SrcReg = coerceToScalar(SrcReg);
9176 if (!SrcReg)
9177 return UnableToLegalize;
9178
9179 // Expand scalarizing unmerge as bitcast to integer and shift.
9180 LLT IntTy = MRI.getType(SrcReg);
9181
9182 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9183
9184 const unsigned DstSize = DstTy.getSizeInBits();
9185 unsigned Offset = DstSize;
9186 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9187 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9188 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9189 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9190 }
9191
9192 MI.eraseFromParent();
9193 return Legalized;
9194}
9195
9196/// Lower a vector extract or insert by writing the vector to a stack temporary
9197/// and reloading the element or vector.
9198///
9199/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9200/// =>
9201/// %stack_temp = G_FRAME_INDEX
9202/// G_STORE %vec, %stack_temp
9203/// %idx = clamp(%idx, %vec.getNumElements())
9204/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9205/// %dst = G_LOAD %element_ptr
9208 Register DstReg = MI.getOperand(0).getReg();
9209 Register SrcVec = MI.getOperand(1).getReg();
9210 Register InsertVal;
9211 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9212 InsertVal = MI.getOperand(2).getReg();
9213
9214 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9215
9216 LLT VecTy = MRI.getType(SrcVec);
9217 LLT EltTy = VecTy.getElementType();
9218 unsigned NumElts = VecTy.getNumElements();
9219
9220 int64_t IdxVal;
9221 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9223 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9224
9225 if (InsertVal) {
9226 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9227 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9228 } else {
9229 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9230 }
9231
9232 MI.eraseFromParent();
9233 return Legalized;
9234 }
9235
9236 if (!EltTy.isByteSized()) { // Not implemented.
9237 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9238 return UnableToLegalize;
9239 }
9240
9241 unsigned EltBytes = EltTy.getSizeInBytes();
9242 Align VecAlign = getStackTemporaryAlignment(VecTy);
9243 Align EltAlign;
9244
9245 MachinePointerInfo PtrInfo;
9246 auto StackTemp = createStackTemporary(
9247 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9248 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9249
9250 // Get the pointer to the element, and be sure not to hit undefined behavior
9251 // if the index is out of bounds.
9252 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9253
9254 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9255 int64_t Offset = IdxVal * EltBytes;
9256 PtrInfo = PtrInfo.getWithOffset(Offset);
9257 EltAlign = commonAlignment(VecAlign, Offset);
9258 } else {
9259 // We lose information with a variable offset.
9260 EltAlign = getStackTemporaryAlignment(EltTy);
9261 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9262 }
9263
9264 if (InsertVal) {
9265 // Write the inserted element
9266 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9267
9268 // Reload the whole vector.
9269 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9270 } else {
9271 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9272 }
9273
9274 MI.eraseFromParent();
9275 return Legalized;
9276}
9277
9280 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9281 MI.getFirst3RegLLTs();
9282 LLT IdxTy = LLT::scalar(32);
9283
9284 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9287 LLT EltTy = DstTy.getScalarType();
9288
9289 DenseMap<unsigned, Register> CachedExtract;
9290
9291 for (int Idx : Mask) {
9292 if (Idx < 0) {
9293 if (!Undef.isValid())
9294 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9295 BuildVec.push_back(Undef);
9296 continue;
9297 }
9298
9299 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9300
9301 int NumElts = Src0Ty.getNumElements();
9302 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9303 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9304 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9305 if (Inserted) {
9306 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9307 It->second =
9308 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9309 }
9310 BuildVec.push_back(It->second);
9311 }
9312
9313 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9314 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9315 MI.eraseFromParent();
9316 return Legalized;
9317}
9318
9321 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9322 MI.getFirst4RegLLTs();
9323
9324 if (VecTy.isScalableVector())
9325 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9326
9327 Align VecAlign = getStackTemporaryAlignment(VecTy);
9328 MachinePointerInfo PtrInfo;
9329 Register StackPtr =
9330 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9331 PtrInfo)
9332 .getReg(0);
9333 MachinePointerInfo ValPtrInfo =
9335
9336 LLT IdxTy = LLT::scalar(32);
9337 LLT ValTy = VecTy.getElementType();
9338 Align ValAlign = getStackTemporaryAlignment(ValTy);
9339
9340 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9341
9342 bool HasPassthru =
9343 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9344
9345 if (HasPassthru)
9346 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9347
9348 Register LastWriteVal;
9349 std::optional<APInt> PassthruSplatVal =
9350 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9351
9352 if (PassthruSplatVal.has_value()) {
9353 LastWriteVal =
9354 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9355 } else if (HasPassthru) {
9356 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9357 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9358 {LLT::scalar(32)}, {Popcount});
9359
9360 Register LastElmtPtr =
9361 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9362 LastWriteVal =
9363 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9364 .getReg(0);
9365 }
9366
9367 unsigned NumElmts = VecTy.getNumElements();
9368 for (unsigned I = 0; I < NumElmts; ++I) {
9369 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9370 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9371 Register ElmtPtr =
9372 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9373 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9374
9375 LLT MaskITy = MaskTy.getElementType();
9376 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9377 if (MaskITy.getSizeInBits() > 1)
9378 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9379
9380 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9381 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9382
9383 if (HasPassthru && I == NumElmts - 1) {
9384 auto EndOfVector =
9385 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9386 auto AllLanesSelected = MIRBuilder.buildICmp(
9387 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9388 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9389 {OutPos, EndOfVector});
9390 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9391
9392 LastWriteVal =
9393 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9394 .getReg(0);
9395 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9396 }
9397 }
9398
9399 // TODO: Use StackPtr's FrameIndex alignment.
9400 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9401
9402 MI.eraseFromParent();
9403 return Legalized;
9404}
9405
9407 Register AllocSize,
9408 Align Alignment,
9409 LLT PtrTy) {
9410 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9411
9412 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9413 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9414
9415 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9416 // have to generate an extra instruction to negate the alloc and then use
9417 // G_PTR_ADD to add the negative offset.
9418 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9419 if (Alignment > Align(1)) {
9420 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9421 AlignMask.negate();
9422 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9423 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9424 }
9425
9426 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9427}
9428
9431 const auto &MF = *MI.getMF();
9432 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9433 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9434 return UnableToLegalize;
9435
9436 Register Dst = MI.getOperand(0).getReg();
9437 Register AllocSize = MI.getOperand(1).getReg();
9438 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9439
9440 LLT PtrTy = MRI.getType(Dst);
9441 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9442 Register SPTmp =
9443 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9444
9445 MIRBuilder.buildCopy(SPReg, SPTmp);
9446 MIRBuilder.buildCopy(Dst, SPTmp);
9447
9448 MI.eraseFromParent();
9449 return Legalized;
9450}
9451
9454 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9455 if (!StackPtr)
9456 return UnableToLegalize;
9457
9458 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9459 MI.eraseFromParent();
9460 return Legalized;
9461}
9462
9465 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9466 if (!StackPtr)
9467 return UnableToLegalize;
9468
9469 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9470 MI.eraseFromParent();
9471 return Legalized;
9472}
9473
9476 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9477 unsigned Offset = MI.getOperand(2).getImm();
9478
9479 // Extract sub-vector or one element
9480 if (SrcTy.isVector()) {
9481 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9482 unsigned DstSize = DstTy.getSizeInBits();
9483
9484 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9485 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9486 // Unmerge and allow access to each Src element for the artifact combiner.
9487 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9488
9489 // Take element(s) we need to extract and copy it (merge them).
9490 SmallVector<Register, 8> SubVectorElts;
9491 for (unsigned Idx = Offset / SrcEltSize;
9492 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9493 SubVectorElts.push_back(Unmerge.getReg(Idx));
9494 }
9495 if (SubVectorElts.size() == 1)
9496 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9497 else
9498 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9499
9500 MI.eraseFromParent();
9501 return Legalized;
9502 }
9503 }
9504
9505 const DataLayout &DL = MIRBuilder.getDataLayout();
9506 if ((SrcTy.isPointer() &&
9507 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9508 (DstTy.isPointer() &&
9509 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9510 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9511 return UnableToLegalize;
9512 }
9513
9514 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9515 (SrcTy.isScalar() || SrcTy.isPointer() ||
9516 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9517 LLT SrcIntTy = SrcTy;
9518 if (!SrcTy.isScalar()) {
9519 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9520 SrcReg = MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9521 }
9522
9523 Register ResultReg = DstReg;
9524 if (DstTy.isPointer())
9525 ResultReg =
9526 MRI.createGenericVirtualRegister(LLT::scalar(DstTy.getSizeInBits()));
9527
9528 if (Offset == 0)
9529 MIRBuilder.buildTrunc(ResultReg, SrcReg);
9530 else {
9531 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9532 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9533 MIRBuilder.buildTrunc(ResultReg, Shr);
9534 }
9535
9536 if (DstTy.isPointer())
9537 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9538
9539 MI.eraseFromParent();
9540 return Legalized;
9541 }
9542
9543 return UnableToLegalize;
9544}
9545
9547 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9548 uint64_t Offset = MI.getOperand(3).getImm();
9549
9550 LLT DstTy = MRI.getType(Src);
9551 LLT InsertTy = MRI.getType(InsertSrc);
9552
9553 const DataLayout &DL = MIRBuilder.getDataLayout();
9554 bool IsNonIntegralInsert =
9555 InsertTy.isPointerOrPointerVector() &&
9556 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace());
9557 bool IsNonIntegralDst = DstTy.isPointerOrPointerVector() &&
9558 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace());
9559
9560 // Insert sub-vector or one element
9561 if (DstTy.isVector()) {
9562 LLT EltTy = DstTy.getElementType();
9563
9564 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9565 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9566 return UnableToLegalize;
9567 }
9568
9569 unsigned EltSize = EltTy.getSizeInBits();
9570 unsigned InsertSize = InsertTy.getSizeInBits();
9571
9572 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9573 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9574 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9576 unsigned Idx = 0;
9577 // Elements from Src before insert start Offset
9578 for (; Idx < Offset / EltSize; ++Idx) {
9579 DstElts.push_back(UnmergeSrc.getReg(Idx));
9580 }
9581
9582 // Replace elements in Src with elements from InsertSrc
9583 if (InsertTy.getSizeInBits() > EltSize) {
9584 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9585 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9586 ++Idx, ++i) {
9587 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9588 }
9589 } else {
9590 if (InsertTy.isPointer() && !EltTy.isPointer())
9591 InsertSrc = MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9592 else if (!InsertTy.isPointer() && EltTy.isPointer())
9593 InsertSrc = MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9594 DstElts.push_back(InsertSrc);
9595 ++Idx;
9596 }
9597
9598 // Remaining elements from Src after insert
9599 for (; Idx < DstTy.getNumElements(); ++Idx) {
9600 DstElts.push_back(UnmergeSrc.getReg(Idx));
9601 }
9602
9603 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9604 MI.eraseFromParent();
9605 return Legalized;
9606 }
9607 }
9608
9609 if (InsertTy.isVector() ||
9610 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9611 return UnableToLegalize;
9612
9613 if (IsNonIntegralDst || IsNonIntegralInsert) {
9614 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9615 return UnableToLegalize;
9616 }
9617
9618 LLT IntDstTy = DstTy;
9619
9620 if (!DstTy.isScalar()) {
9621 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9622 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9623 }
9624
9625 if (!InsertTy.isScalar()) {
9626 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9627 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9628 }
9629
9630 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9631 if (Offset != 0) {
9632 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9633 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9634 }
9635
9637 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9638
9639 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9640 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9641 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9642
9643 MIRBuilder.buildCast(Dst, Or);
9644 MI.eraseFromParent();
9645 return Legalized;
9646}
9647
9650 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9651 MI.getFirst4RegLLTs();
9652 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9653
9654 LLT Ty = Dst0Ty;
9655 LLT BoolTy = Dst1Ty;
9656
9657 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9658
9659 if (IsAdd)
9660 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9661 else
9662 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9663
9664 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9665
9666 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9667
9668 // For an addition, the result should be less than one of the operands (LHS)
9669 // if and only if the other operand (RHS) is negative, otherwise there will
9670 // be overflow.
9671 // For a subtraction, the result should be less than one of the operands
9672 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9673 // otherwise there will be overflow.
9674 auto ResultLowerThanLHS =
9675 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9676 auto ConditionRHS = MIRBuilder.buildICmp(
9677 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9678
9679 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9680
9681 MIRBuilder.buildCopy(Dst0, NewDst0);
9682 MI.eraseFromParent();
9683
9684 return Legalized;
9685}
9686
9688 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9689 const LLT Ty = MRI.getType(Res);
9690
9691 // sum = LHS + RHS + zext(CarryIn)
9692 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9693 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9694 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9695 MIRBuilder.buildCopy(Res, Sum);
9696
9697 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9698 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9699 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9700 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9701
9702 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9703 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9704
9705 MI.eraseFromParent();
9706 return Legalized;
9707}
9708
9710 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9711 const LLT Ty = MRI.getType(Res);
9712
9713 // Diff = LHS - (RHS + zext(CarryIn))
9714 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9715 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9716 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9717 MIRBuilder.buildCopy(Res, Diff);
9718
9719 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9720 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9721 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9722 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9723 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9724 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9725
9726 MI.eraseFromParent();
9727 return Legalized;
9728}
9729
9732 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9733 LLT Ty = MRI.getType(Res);
9734 bool IsSigned;
9735 bool IsAdd;
9736 unsigned BaseOp;
9737 switch (MI.getOpcode()) {
9738 default:
9739 llvm_unreachable("unexpected addsat/subsat opcode");
9740 case TargetOpcode::G_UADDSAT:
9741 IsSigned = false;
9742 IsAdd = true;
9743 BaseOp = TargetOpcode::G_ADD;
9744 break;
9745 case TargetOpcode::G_SADDSAT:
9746 IsSigned = true;
9747 IsAdd = true;
9748 BaseOp = TargetOpcode::G_ADD;
9749 break;
9750 case TargetOpcode::G_USUBSAT:
9751 IsSigned = false;
9752 IsAdd = false;
9753 BaseOp = TargetOpcode::G_SUB;
9754 break;
9755 case TargetOpcode::G_SSUBSAT:
9756 IsSigned = true;
9757 IsAdd = false;
9758 BaseOp = TargetOpcode::G_SUB;
9759 break;
9760 }
9761
9762 if (IsSigned) {
9763 // sadd.sat(a, b) ->
9764 // hi = 0x7fffffff - smax(a, 0)
9765 // lo = 0x80000000 - smin(a, 0)
9766 // a + smin(smax(lo, b), hi)
9767 // ssub.sat(a, b) ->
9768 // lo = smax(a, -1) - 0x7fffffff
9769 // hi = smin(a, -1) - 0x80000000
9770 // a - smin(smax(lo, b), hi)
9771 // TODO: AMDGPU can use a "median of 3" instruction here:
9772 // a +/- med3(lo, b, hi)
9773 uint64_t NumBits = Ty.getScalarSizeInBits();
9774 auto MaxVal =
9775 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9776 auto MinVal =
9777 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9779 if (IsAdd) {
9780 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9781 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9782 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9783 } else {
9784 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9785 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9786 MaxVal);
9787 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9788 MinVal);
9789 }
9790 auto RHSClamped =
9791 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9792 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9793 } else {
9794 // uadd.sat(a, b) -> a + umin(~a, b)
9795 // usub.sat(a, b) -> a - umin(a, b)
9796 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9797 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9798 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9799 }
9800
9801 MI.eraseFromParent();
9802 return Legalized;
9803}
9804
9807 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9808 LLT Ty = MRI.getType(Res);
9809 LLT BoolTy = Ty.changeElementSize(1);
9810 bool IsSigned;
9811 bool IsAdd;
9812 unsigned OverflowOp;
9813 switch (MI.getOpcode()) {
9814 default:
9815 llvm_unreachable("unexpected addsat/subsat opcode");
9816 case TargetOpcode::G_UADDSAT:
9817 IsSigned = false;
9818 IsAdd = true;
9819 OverflowOp = TargetOpcode::G_UADDO;
9820 break;
9821 case TargetOpcode::G_SADDSAT:
9822 IsSigned = true;
9823 IsAdd = true;
9824 OverflowOp = TargetOpcode::G_SADDO;
9825 break;
9826 case TargetOpcode::G_USUBSAT:
9827 IsSigned = false;
9828 IsAdd = false;
9829 OverflowOp = TargetOpcode::G_USUBO;
9830 break;
9831 case TargetOpcode::G_SSUBSAT:
9832 IsSigned = true;
9833 IsAdd = false;
9834 OverflowOp = TargetOpcode::G_SSUBO;
9835 break;
9836 }
9837
9838 auto OverflowRes =
9839 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9840 Register Tmp = OverflowRes.getReg(0);
9841 Register Ov = OverflowRes.getReg(1);
9842 MachineInstrBuilder Clamp;
9843 if (IsSigned) {
9844 // sadd.sat(a, b) ->
9845 // {tmp, ov} = saddo(a, b)
9846 // ov ? (tmp >>s 31) + 0x80000000 : r
9847 // ssub.sat(a, b) ->
9848 // {tmp, ov} = ssubo(a, b)
9849 // ov ? (tmp >>s 31) + 0x80000000 : r
9850 uint64_t NumBits = Ty.getScalarSizeInBits();
9851 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9852 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9853 auto MinVal =
9854 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9855 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9856 } else {
9857 // uadd.sat(a, b) ->
9858 // {tmp, ov} = uaddo(a, b)
9859 // ov ? 0xffffffff : tmp
9860 // usub.sat(a, b) ->
9861 // {tmp, ov} = usubo(a, b)
9862 // ov ? 0 : tmp
9863 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9864 }
9865 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9866
9867 MI.eraseFromParent();
9868 return Legalized;
9869}
9870
9873 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9874 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9875 "Expected shlsat opcode!");
9876 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9877 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9878 LLT Ty = MRI.getType(Res);
9879 LLT BoolTy = Ty.changeElementSize(1);
9880
9881 unsigned BW = Ty.getScalarSizeInBits();
9882 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9883 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9884 : MIRBuilder.buildLShr(Ty, Result, RHS);
9885
9886 MachineInstrBuilder SatVal;
9887 if (IsSigned) {
9888 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9889 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9890 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9891 MIRBuilder.buildConstant(Ty, 0));
9892 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9893 } else {
9894 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9895 }
9896 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9897 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9898
9899 MI.eraseFromParent();
9900 return Legalized;
9901}
9902
9904 auto [Dst, Src] = MI.getFirst2Regs();
9905 const LLT Ty = MRI.getType(Src);
9906 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9907 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9908
9909 // Swap most and least significant byte, set remaining bytes in Res to zero.
9910 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9911 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9912 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9913 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9914
9915 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9916 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9917 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9918 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9919 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9920 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9921 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9922 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9923 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9924 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9925 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9926 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9927 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9928 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9929 }
9930 Res.getInstr()->getOperand(0).setReg(Dst);
9931
9932 MI.eraseFromParent();
9933 return Legalized;
9934}
9935
9936//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9938 MachineInstrBuilder Src, const APInt &Mask) {
9939 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9940 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9941 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9942 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9943 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9944 return B.buildOr(Dst, LHS, RHS);
9945}
9946
9949 auto [Dst, Src] = MI.getFirst2Regs();
9950 const LLT SrcTy = MRI.getType(Src);
9951 unsigned Size = SrcTy.getScalarSizeInBits();
9952 unsigned VSize = SrcTy.getSizeInBits();
9953
9954 if (Size >= 8) {
9955 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9956 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9957 {LLT::fixed_vector(VSize / 8, 8),
9958 LLT::fixed_vector(VSize / 8, 8)}}))) {
9959 // If bitreverse is legal for i8 vector of the same size, then cast
9960 // to i8 vector type.
9961 // e.g. v4s32 -> v16s8
9962 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9963 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9964 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9965 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9966 MIRBuilder.buildBitcast(Dst, RBIT);
9967 } else {
9968 MachineInstrBuilder BSWAP =
9969 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9970
9971 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9972 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9973 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9974 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9975 APInt::getSplat(Size, APInt(8, 0xF0)));
9976
9977 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9978 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9979 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9980 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9981 APInt::getSplat(Size, APInt(8, 0xCC)));
9982
9983 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9984 // 6|7
9985 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9986 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9987 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9988 }
9989 } else {
9990 // Expand bitreverse for types smaller than 8 bits.
9992 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9994 if (I < J) {
9995 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9996 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9997 } else {
9998 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9999 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
10000 }
10001
10002 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
10003 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
10004 if (I == 0)
10005 Tmp = Tmp2;
10006 else
10007 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
10008 }
10009 MIRBuilder.buildCopy(Dst, Tmp);
10010 }
10011
10012 MI.eraseFromParent();
10013 return Legalized;
10014}
10015
10018 MachineFunction &MF = MIRBuilder.getMF();
10019
10020 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10021 int NameOpIdx = IsRead ? 1 : 0;
10022 int ValRegIndex = IsRead ? 0 : 1;
10023
10024 Register ValReg = MI.getOperand(ValRegIndex).getReg();
10025 const LLT Ty = MRI.getType(ValReg);
10026 const MDString *RegStr = cast<MDString>(
10027 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10028
10029 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
10030 if (!PhysReg) {
10031 const Function &Fn = MF.getFunction();
10033 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
10034 (IsRead ? "llvm.read_register" : "llvm.write_register"),
10035 Fn, MI.getDebugLoc()));
10036 if (IsRead)
10037 MIRBuilder.buildUndef(ValReg);
10038
10039 MI.eraseFromParent();
10040 return Legalized;
10041 }
10042
10043 if (IsRead)
10044 MIRBuilder.buildCopy(ValReg, PhysReg);
10045 else
10046 MIRBuilder.buildCopy(PhysReg, ValReg);
10047
10048 MI.eraseFromParent();
10049 return Legalized;
10050}
10051
10054 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
10055 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10056 Register Result = MI.getOperand(0).getReg();
10057 LLT OrigTy = MRI.getType(Result);
10058 auto SizeInBits = OrigTy.getScalarSizeInBits();
10059 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
10060
10061 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
10062 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
10063 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
10064 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10065
10066 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
10067 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
10068 MIRBuilder.buildTrunc(Result, Shifted);
10069
10070 MI.eraseFromParent();
10071 return Legalized;
10072}
10073
10076 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10077 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
10078
10079 if (Mask == fcNone) {
10080 MIRBuilder.buildConstant(DstReg, 0);
10081 MI.eraseFromParent();
10082 return Legalized;
10083 }
10084 if (Mask == fcAllFlags) {
10085 MIRBuilder.buildConstant(DstReg, 1);
10086 MI.eraseFromParent();
10087 return Legalized;
10088 }
10089
10090 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10091 // version
10092
10093 unsigned BitSize = SrcTy.getScalarSizeInBits();
10094 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
10095
10096 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
10097 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
10098
10099 // Various masks.
10100 APInt SignBit = APInt::getSignMask(BitSize);
10101 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10102 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10103 APInt ExpMask = Inf;
10104 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10105 APInt QNaNBitMask =
10106 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10107 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
10108
10109 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
10110 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
10111 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
10112 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
10113 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
10114
10115 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10116 auto Sign =
10117 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
10118
10119 auto Res = MIRBuilder.buildConstant(DstTy, 0);
10120 // Clang doesn't support capture of structured bindings:
10121 LLT DstTyCopy = DstTy;
10122 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10123 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10124 };
10125
10126 // Tests that involve more than one class should be processed first.
10127 if ((Mask & fcFinite) == fcFinite) {
10128 // finite(V) ==> abs(V) u< exp_mask
10129 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10130 ExpMaskC));
10131 Mask &= ~fcFinite;
10132 } else if ((Mask & fcFinite) == fcPosFinite) {
10133 // finite(V) && V > 0 ==> V u< exp_mask
10134 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10135 ExpMaskC));
10136 Mask &= ~fcPosFinite;
10137 } else if ((Mask & fcFinite) == fcNegFinite) {
10138 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10139 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10140 ExpMaskC);
10141 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10142 appendToRes(And);
10143 Mask &= ~fcNegFinite;
10144 }
10145
10146 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10147 // fcZero | fcSubnormal => test all exponent bits are 0
10148 // TODO: Handle sign bit specific cases
10149 // TODO: Handle inverted case
10150 if (PartialCheck == (fcZero | fcSubnormal)) {
10151 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10152 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10153 ExpBits, ZeroC));
10154 Mask &= ~PartialCheck;
10155 }
10156 }
10157
10158 // Check for individual classes.
10159 if (FPClassTest PartialCheck = Mask & fcZero) {
10160 if (PartialCheck == fcPosZero)
10161 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10162 AsInt, ZeroC));
10163 else if (PartialCheck == fcZero)
10164 appendToRes(
10165 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10166 else // fcNegZero
10167 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10168 AsInt, SignBitC));
10169 }
10170
10171 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10172 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10173 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10174 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10175 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10176 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10177 auto SubnormalRes =
10178 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10179 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10180 if (PartialCheck == fcNegSubnormal)
10181 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10182 appendToRes(SubnormalRes);
10183 }
10184
10185 if (FPClassTest PartialCheck = Mask & fcInf) {
10186 if (PartialCheck == fcPosInf)
10187 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10188 AsInt, InfC));
10189 else if (PartialCheck == fcInf)
10190 appendToRes(
10191 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10192 else { // fcNegInf
10193 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10194 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10195 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10196 AsInt, NegInfC));
10197 }
10198 }
10199
10200 if (FPClassTest PartialCheck = Mask & fcNan) {
10201 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10202 if (PartialCheck == fcNan) {
10203 // isnan(V) ==> abs(V) u> int(inf)
10204 appendToRes(
10205 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10206 } else if (PartialCheck == fcQNan) {
10207 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10208 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10209 InfWithQnanBitC));
10210 } else { // fcSNan
10211 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10212 // abs(V) u< (unsigned(Inf) | quiet_bit)
10213 auto IsNan =
10214 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10215 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10216 Abs, InfWithQnanBitC);
10217 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10218 }
10219 }
10220
10221 if (FPClassTest PartialCheck = Mask & fcNormal) {
10222 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10223 // (max_exp-1))
10224 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10225 auto ExpMinusOne = MIRBuilder.buildSub(
10226 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10227 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10228 auto NormalRes =
10229 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10230 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10231 if (PartialCheck == fcNegNormal)
10232 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10233 else if (PartialCheck == fcPosNormal) {
10234 auto PosSign = MIRBuilder.buildXor(
10235 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10236 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10237 }
10238 appendToRes(NormalRes);
10239 }
10240
10241 MIRBuilder.buildCopy(DstReg, Res);
10242 MI.eraseFromParent();
10243 return Legalized;
10244}
10245
10247 // Implement G_SELECT in terms of XOR, AND, OR.
10248 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10249 MI.getFirst4RegLLTs();
10250
10251 LLT Op1TyInt =
10252 Op1Ty.changeElementType(LLT::integer(Op1Ty.getScalarSizeInBits()));
10253
10254 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10255 if (IsEltPtr) {
10256 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10257 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10258 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10259 Op1Ty = MRI.getType(Op1Reg);
10260 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10261 Op2Ty = MRI.getType(Op2Reg);
10262 DstTy = NewTy;
10263 }
10264
10265 if (MaskTy.isScalar()) {
10266 // Turn the scalar condition into a vector condition mask if needed.
10267
10268 Register MaskElt = MaskReg;
10269
10270 // The condition was potentially zero extended before, but we want a sign
10271 // extended boolean.
10272 if (MaskTy != LLT::scalar(1))
10273 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10274
10275 // Continue the sign extension (or truncate) to match the data type.
10276 MaskElt =
10277 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10278
10279 if (DstTy.isVector()) {
10280 // Generate a vector splat idiom.
10281 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10282 MaskReg = ShufSplat.getReg(0);
10283 } else {
10284 MaskReg = MaskElt;
10285 }
10286 MaskTy = DstTy;
10287 } else if (!DstTy.isVector()) {
10288 // Cannot handle the case that mask is a vector and dst is a scalar.
10289 return UnableToLegalize;
10290 }
10291
10292 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10293 return UnableToLegalize;
10294 }
10295
10296 if (!(Op1Ty.getScalarType().isAnyScalar() ||
10297 Op1Ty.getScalarType().isInteger()))
10298 Op1Reg = MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10299
10300 if (!(Op2Ty.getScalarType().isAnyScalar() ||
10301 Op2Ty.getScalarType().isInteger())) {
10302 auto Op2TyInt =
10303 Op2Ty.changeElementType(LLT::integer(Op2Ty.getScalarSizeInBits()));
10304 Op2Reg = MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10305 }
10306
10307 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10308 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10309 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10310 if (IsEltPtr) {
10311 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10312 MIRBuilder.buildIntToPtr(DstReg, Or);
10313 } else {
10314 if (DstTy == Op1TyInt)
10315 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10316 else {
10317 auto Or = MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10318 MIRBuilder.buildBitcast(DstReg, Or.getReg(0));
10319 }
10320 }
10321 MI.eraseFromParent();
10322 return Legalized;
10323}
10324
10326 // Split DIVREM into individual instructions.
10327 unsigned Opcode = MI.getOpcode();
10328
10329 MIRBuilder.buildInstr(
10330 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10331 : TargetOpcode::G_UDIV,
10332 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10333 MIRBuilder.buildInstr(
10334 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10335 : TargetOpcode::G_UREM,
10336 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10337 MI.eraseFromParent();
10338 return Legalized;
10339}
10340
10343 // Expand %res = G_ABS %a into:
10344 // %v1 = G_ASHR %a, scalar_size-1
10345 // %v2 = G_ADD %a, %v1
10346 // %res = G_XOR %v2, %v1
10347 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10348 Register OpReg = MI.getOperand(1).getReg();
10349 auto ShiftAmt =
10350 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10351 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10352 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10353 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10354 MI.eraseFromParent();
10355 return Legalized;
10356}
10357
10360 // Expand %res = G_ABS %a into:
10361 // %v1 = G_CONSTANT 0
10362 // %v2 = G_SUB %v1, %a
10363 // %res = G_SMAX %a, %v2
10364 Register SrcReg = MI.getOperand(1).getReg();
10365 LLT Ty = MRI.getType(SrcReg);
10366 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10367 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10368 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10369 MI.eraseFromParent();
10370 return Legalized;
10371}
10372
10375 Register SrcReg = MI.getOperand(1).getReg();
10376 Register DestReg = MI.getOperand(0).getReg();
10377 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10378 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10379 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10380 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10381 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10382 MI.eraseFromParent();
10383 return Legalized;
10384}
10385
10388 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10389 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10390 "Expected G_ABDS or G_ABDU instruction");
10391
10392 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10393 LLT Ty = MRI.getType(LHS);
10394
10395 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10396 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10397 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10398 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10399 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10402 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10403 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10404
10405 MI.eraseFromParent();
10406 return Legalized;
10407}
10408
10411 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10412 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10413 "Expected G_ABDS or G_ABDU instruction");
10414
10415 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10416 LLT Ty = MRI.getType(LHS);
10417
10418 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10419 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10420 Register MaxReg, MinReg;
10421 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10422 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10423 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10424 } else {
10425 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10426 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10427 }
10428 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10429
10430 MI.eraseFromParent();
10431 return Legalized;
10432}
10433
10435 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10436 LLT TyInt =
10437 DstTy.changeElementType(LLT::integer(DstTy.getScalarSizeInBits()));
10438 Register CastedSrc = SrcReg;
10439
10440 if (!(SrcTy.getScalarType().isAnyScalar() ||
10441 SrcTy.getScalarType().isInteger())) {
10442 auto SrcTyInt =
10443 SrcTy.changeElementType(LLT::integer(SrcTy.getScalarSizeInBits()));
10444 CastedSrc = MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10445 }
10446
10447 if (MRI.getType(DstReg) != TyInt) {
10448 // Reset sign bit
10449 Register NewDst =
10451 .buildAnd(TyInt, CastedSrc,
10452 MIRBuilder.buildConstant(
10454 DstTy.getScalarSizeInBits())))
10455 .getReg(0);
10456
10457 MIRBuilder.buildBitcast(DstReg, NewDst);
10458 } else
10460 .buildAnd(
10461 DstReg, CastedSrc,
10462 MIRBuilder.buildConstant(
10463 TyInt, APInt::getSignedMaxValue(DstTy.getScalarSizeInBits())))
10464 .getReg(0);
10465
10466 MI.eraseFromParent();
10467 return Legalized;
10468}
10469
10472 Register SrcReg = MI.getOperand(1).getReg();
10473 LLT SrcTy = MRI.getType(SrcReg);
10474 LLT DstTy = MRI.getType(SrcReg);
10475
10476 // The source could be a scalar if the IR type was <1 x sN>.
10477 if (SrcTy.isScalar()) {
10478 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10479 return UnableToLegalize; // FIXME: handle extension.
10480 // This can be just a plain copy.
10481 Observer.changingInstr(MI);
10482 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10483 Observer.changedInstr(MI);
10484 return Legalized;
10485 }
10486 return UnableToLegalize;
10487}
10488
10490 MachineFunction &MF = *MI.getMF();
10491 const DataLayout &DL = MIRBuilder.getDataLayout();
10492 LLVMContext &Ctx = MF.getFunction().getContext();
10493 Register ListPtr = MI.getOperand(1).getReg();
10494 LLT PtrTy = MRI.getType(ListPtr);
10495
10496 // LstPtr is a pointer to the head of the list. Get the address
10497 // of the head of the list.
10498 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10499 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10500 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10501 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10502
10503 const Align A(MI.getOperand(2).getImm());
10504 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10505 if (A > TLI.getMinStackArgumentAlignment()) {
10506 Register AlignAmt =
10507 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10508 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10509 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10510 VAList = AndDst.getReg(0);
10511 }
10512
10513 // Increment the pointer, VAList, to the next vaarg
10514 // The list should be bumped by the size of element in the current head of
10515 // list.
10516 Register Dst = MI.getOperand(0).getReg();
10517 LLT LLTTy = MRI.getType(Dst);
10518 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10519 auto IncAmt =
10520 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10521 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10522
10523 // Store the increment VAList to the legalized pointer
10525 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10526 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10527 // Load the actual argument out of the pointer VAList
10528 Align EltAlignment = DL.getABITypeAlign(Ty);
10529 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10530 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10531 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10532
10533 MI.eraseFromParent();
10534 return Legalized;
10535}
10536
10538 // On Darwin, -Os means optimize for size without hurting performance, so
10539 // only really optimize for size when -Oz (MinSize) is used.
10541 return MF.getFunction().hasMinSize();
10542 return MF.getFunction().hasOptSize();
10543}
10544
10545// Returns a list of types to use for memory op lowering in MemOps. A partial
10546// port of findOptimalMemOpLowering in TargetLowering.
10547static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10548 unsigned Limit, const MemOp &Op,
10549 unsigned DstAS, unsigned SrcAS,
10550 const AttributeList &FuncAttributes,
10551 const TargetLowering &TLI) {
10552 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10553 return false;
10554
10555 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10556
10557 if (Ty == LLT()) {
10558 // Use the largest scalar type whose alignment constraints are satisfied.
10559 // We only need to check DstAlign here as SrcAlign is always greater or
10560 // equal to DstAlign (or zero).
10561 Ty = LLT::scalar(64);
10562 if (Op.isFixedDstAlign())
10563 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10564 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10565 Ty = LLT::scalar(Ty.getSizeInBytes());
10566 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10567 // FIXME: check for the largest legal type we can load/store to.
10568 }
10569
10570 unsigned NumMemOps = 0;
10571 uint64_t Size = Op.size();
10572 while (Size) {
10573 unsigned TySize = Ty.getSizeInBytes();
10574 while (TySize > Size) {
10575 // For now, only use non-vector load / store's for the left-over pieces.
10576 LLT NewTy = Ty;
10577 // FIXME: check for mem op safety and legality of the types. Not all of
10578 // SDAGisms map cleanly to GISel concepts.
10579 if (NewTy.isVector())
10580 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10581 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10582 unsigned NewTySize = NewTy.getSizeInBytes();
10583 assert(NewTySize > 0 && "Could not find appropriate type");
10584
10585 // If the new LLT cannot cover all of the remaining bits, then consider
10586 // issuing a (or a pair of) unaligned and overlapping load / store.
10587 unsigned Fast;
10588 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10589 MVT VT = getMVTForLLT(Ty);
10590 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10592 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10594 Fast)
10595 TySize = Size;
10596 else {
10597 Ty = NewTy;
10598 TySize = NewTySize;
10599 }
10600 }
10601
10602 if (++NumMemOps > Limit)
10603 return false;
10604
10605 MemOps.push_back(Ty);
10606 Size -= TySize;
10607 }
10608
10609 return true;
10610}
10611
10612// Get a vectorized representation of the memset value operand, GISel edition.
10614 MachineRegisterInfo &MRI = *MIB.getMRI();
10615 unsigned NumBits = Ty.getScalarSizeInBits();
10616 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10617 if (!Ty.isVector() && ValVRegAndVal) {
10618 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10619 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10620 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10621 }
10622
10623 // Extend the byte value to the larger type, and then multiply by a magic
10624 // value 0x010101... in order to replicate it across every byte.
10625 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10626 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10627 return MIB.buildConstant(Ty, 0).getReg(0);
10628 }
10629
10630 LLT ExtType = Ty.getScalarType();
10631 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10632 if (NumBits > 8) {
10633 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10634 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10635 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10636 }
10637
10638 // For vector types create a G_BUILD_VECTOR.
10639 if (Ty.isVector())
10640 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10641
10642 return Val;
10643}
10644
10646LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10647 uint64_t KnownLen, Align Alignment,
10648 bool IsVolatile) {
10649 auto &MF = *MI.getParent()->getParent();
10650 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10651 auto &DL = MF.getDataLayout();
10652 LLVMContext &C = MF.getFunction().getContext();
10653
10654 assert(KnownLen != 0 && "Have a zero length memset length!");
10655
10656 bool DstAlignCanChange = false;
10657 MachineFrameInfo &MFI = MF.getFrameInfo();
10658 bool OptSize = shouldLowerMemFuncForSize(MF);
10659
10660 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10661 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10662 DstAlignCanChange = true;
10663
10664 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10665 std::vector<LLT> MemOps;
10666
10667 const auto &DstMMO = **MI.memoperands_begin();
10668 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10669
10670 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10671 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10672
10673 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10674 MemOp::Set(KnownLen, DstAlignCanChange,
10675 Alignment,
10676 /*IsZeroMemset=*/IsZeroVal,
10677 /*IsVolatile=*/IsVolatile),
10678 DstPtrInfo.getAddrSpace(), ~0u,
10679 MF.getFunction().getAttributes(), TLI))
10680 return UnableToLegalize;
10681
10682 if (DstAlignCanChange) {
10683 // Get an estimate of the type from the LLT.
10684 Type *IRTy = getTypeForLLT(MemOps[0], C);
10685 Align NewAlign = DL.getABITypeAlign(IRTy);
10686 if (NewAlign > Alignment) {
10687 Alignment = NewAlign;
10688 unsigned FI = FIDef->getOperand(1).getIndex();
10689 // Give the stack frame object a larger alignment if needed.
10690 if (MFI.getObjectAlign(FI) < Alignment)
10691 MFI.setObjectAlignment(FI, Alignment);
10692 }
10693 }
10694
10695 MachineIRBuilder MIB(MI);
10696 // Find the largest store and generate the bit pattern for it.
10697 LLT LargestTy = MemOps[0];
10698 for (unsigned i = 1; i < MemOps.size(); i++)
10699 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10700 LargestTy = MemOps[i];
10701
10702 // The memset stored value is always defined as an s8, so in order to make it
10703 // work with larger store types we need to repeat the bit pattern across the
10704 // wider type.
10705 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10706
10707 if (!MemSetValue)
10708 return UnableToLegalize;
10709
10710 // Generate the stores. For each store type in the list, we generate the
10711 // matching store of that type to the destination address.
10712 LLT PtrTy = MRI.getType(Dst);
10713 unsigned DstOff = 0;
10714 unsigned Size = KnownLen;
10715 for (unsigned I = 0; I < MemOps.size(); I++) {
10716 LLT Ty = MemOps[I];
10717 unsigned TySize = Ty.getSizeInBytes();
10718 if (TySize > Size) {
10719 // Issuing an unaligned load / store pair that overlaps with the previous
10720 // pair. Adjust the offset accordingly.
10721 assert(I == MemOps.size() - 1 && I != 0);
10722 DstOff -= TySize - Size;
10723 }
10724
10725 // If this store is smaller than the largest store see whether we can get
10726 // the smaller value for free with a truncate.
10727 Register Value = MemSetValue;
10728 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10729 MVT VT = getMVTForLLT(Ty);
10730 MVT LargestVT = getMVTForLLT(LargestTy);
10731 if (!LargestTy.isVector() && !Ty.isVector() &&
10732 TLI.isTruncateFree(LargestVT, VT))
10733 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10734 else
10735 Value = getMemsetValue(Val, Ty, MIB);
10736 if (!Value)
10737 return UnableToLegalize;
10738 }
10739
10740 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10741
10742 Register Ptr = Dst;
10743 if (DstOff != 0) {
10744 auto Offset =
10745 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10746 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10747 }
10748
10749 MIB.buildStore(Value, Ptr, *StoreMMO);
10750 DstOff += Ty.getSizeInBytes();
10751 Size -= TySize;
10752 }
10753
10754 MI.eraseFromParent();
10755 return Legalized;
10756}
10757
10759LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10760 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10761
10762 auto [Dst, Src, Len] = MI.getFirst3Regs();
10763
10764 const auto *MMOIt = MI.memoperands_begin();
10765 const MachineMemOperand *MemOp = *MMOIt;
10766 bool IsVolatile = MemOp->isVolatile();
10767
10768 // See if this is a constant length copy
10769 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10770 // FIXME: support dynamically sized G_MEMCPY_INLINE
10771 assert(LenVRegAndVal &&
10772 "inline memcpy with dynamic size is not yet supported");
10773 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10774 if (KnownLen == 0) {
10775 MI.eraseFromParent();
10776 return Legalized;
10777 }
10778
10779 const auto &DstMMO = **MI.memoperands_begin();
10780 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10781 Align DstAlign = DstMMO.getBaseAlign();
10782 Align SrcAlign = SrcMMO.getBaseAlign();
10783
10784 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10785 IsVolatile);
10786}
10787
10789LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10790 uint64_t KnownLen, Align DstAlign,
10791 Align SrcAlign, bool IsVolatile) {
10792 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10793 return lowerMemcpy(MI, Dst, Src, KnownLen,
10794 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10795 IsVolatile);
10796}
10797
10799LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10800 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10801 Align SrcAlign, bool IsVolatile) {
10802 auto &MF = *MI.getParent()->getParent();
10803 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10804 auto &DL = MF.getDataLayout();
10806
10807 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10808
10809 bool DstAlignCanChange = false;
10810 MachineFrameInfo &MFI = MF.getFrameInfo();
10811 Align Alignment = std::min(DstAlign, SrcAlign);
10812
10813 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10814 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10815 DstAlignCanChange = true;
10816
10817 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10818 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10819 // if the memcpy is in a tail call position.
10820
10821 std::vector<LLT> MemOps;
10822
10823 const auto &DstMMO = **MI.memoperands_begin();
10824 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10825 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10826 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10827
10829 MemOps, Limit,
10830 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10831 IsVolatile),
10832 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10833 MF.getFunction().getAttributes(), TLI))
10834 return UnableToLegalize;
10835
10836 if (DstAlignCanChange) {
10837 // Get an estimate of the type from the LLT.
10838 Type *IRTy = getTypeForLLT(MemOps[0], C);
10839 Align NewAlign = DL.getABITypeAlign(IRTy);
10840
10841 // Don't promote to an alignment that would require dynamic stack
10842 // realignment.
10844 if (!TRI->hasStackRealignment(MF))
10845 if (MaybeAlign StackAlign = DL.getStackAlignment())
10846 NewAlign = std::min(NewAlign, *StackAlign);
10847
10848 if (NewAlign > Alignment) {
10849 Alignment = NewAlign;
10850 unsigned FI = FIDef->getOperand(1).getIndex();
10851 // Give the stack frame object a larger alignment if needed.
10852 if (MFI.getObjectAlign(FI) < Alignment)
10853 MFI.setObjectAlignment(FI, Alignment);
10854 }
10855 }
10856
10857 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10858
10859 MachineIRBuilder MIB(MI);
10860 // Now we need to emit a pair of load and stores for each of the types we've
10861 // collected. I.e. for each type, generate a load from the source pointer of
10862 // that type width, and then generate a corresponding store to the dest buffer
10863 // of that value loaded. This can result in a sequence of loads and stores
10864 // mixed types, depending on what the target specifies as good types to use.
10865 unsigned CurrOffset = 0;
10866 unsigned Size = KnownLen;
10867 for (auto CopyTy : MemOps) {
10868 // Issuing an unaligned load / store pair that overlaps with the previous
10869 // pair. Adjust the offset accordingly.
10870 if (CopyTy.getSizeInBytes() > Size)
10871 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10872
10873 // Construct MMOs for the accesses.
10874 auto *LoadMMO =
10875 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10876 auto *StoreMMO =
10877 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10878
10879 // Create the load.
10880 Register LoadPtr = Src;
10882 if (CurrOffset != 0) {
10883 LLT SrcTy = MRI.getType(Src);
10884 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10885 .getReg(0);
10886 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10887 }
10888 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10889
10890 // Create the store.
10891 Register StorePtr = Dst;
10892 if (CurrOffset != 0) {
10893 LLT DstTy = MRI.getType(Dst);
10894 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10895 }
10896 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10897 CurrOffset += CopyTy.getSizeInBytes();
10898 Size -= CopyTy.getSizeInBytes();
10899 }
10900
10901 MI.eraseFromParent();
10902 return Legalized;
10903}
10904
10906LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10907 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10908 bool IsVolatile) {
10909 auto &MF = *MI.getParent()->getParent();
10910 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10911 auto &DL = MF.getDataLayout();
10912 LLVMContext &C = MF.getFunction().getContext();
10913
10914 assert(KnownLen != 0 && "Have a zero length memmove length!");
10915
10916 bool DstAlignCanChange = false;
10917 MachineFrameInfo &MFI = MF.getFrameInfo();
10918 bool OptSize = shouldLowerMemFuncForSize(MF);
10919 Align Alignment = std::min(DstAlign, SrcAlign);
10920
10921 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10922 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10923 DstAlignCanChange = true;
10924
10925 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10926 std::vector<LLT> MemOps;
10927
10928 const auto &DstMMO = **MI.memoperands_begin();
10929 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10930 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10931 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10932
10933 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10934 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10935 // same thing here.
10937 MemOps, Limit,
10938 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10939 /*IsVolatile*/ true),
10940 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10941 MF.getFunction().getAttributes(), TLI))
10942 return UnableToLegalize;
10943
10944 if (DstAlignCanChange) {
10945 // Get an estimate of the type from the LLT.
10946 Type *IRTy = getTypeForLLT(MemOps[0], C);
10947 Align NewAlign = DL.getABITypeAlign(IRTy);
10948
10949 // Don't promote to an alignment that would require dynamic stack
10950 // realignment.
10951 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10952 if (!TRI->hasStackRealignment(MF))
10953 if (MaybeAlign StackAlign = DL.getStackAlignment())
10954 NewAlign = std::min(NewAlign, *StackAlign);
10955
10956 if (NewAlign > Alignment) {
10957 Alignment = NewAlign;
10958 unsigned FI = FIDef->getOperand(1).getIndex();
10959 // Give the stack frame object a larger alignment if needed.
10960 if (MFI.getObjectAlign(FI) < Alignment)
10961 MFI.setObjectAlignment(FI, Alignment);
10962 }
10963 }
10964
10965 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10966
10967 MachineIRBuilder MIB(MI);
10968 // Memmove requires that we perform the loads first before issuing the stores.
10969 // Apart from that, this loop is pretty much doing the same thing as the
10970 // memcpy codegen function.
10971 unsigned CurrOffset = 0;
10972 SmallVector<Register, 16> LoadVals;
10973 for (auto CopyTy : MemOps) {
10974 // Construct MMO for the load.
10975 auto *LoadMMO =
10976 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10977
10978 // Create the load.
10979 Register LoadPtr = Src;
10980 if (CurrOffset != 0) {
10981 LLT SrcTy = MRI.getType(Src);
10982 auto Offset =
10983 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10984 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10985 }
10986 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10987 CurrOffset += CopyTy.getSizeInBytes();
10988 }
10989
10990 CurrOffset = 0;
10991 for (unsigned I = 0; I < MemOps.size(); ++I) {
10992 LLT CopyTy = MemOps[I];
10993 // Now store the values loaded.
10994 auto *StoreMMO =
10995 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10996
10997 Register StorePtr = Dst;
10998 if (CurrOffset != 0) {
10999 LLT DstTy = MRI.getType(Dst);
11000 auto Offset =
11001 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
11002 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
11003 }
11004 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
11005 CurrOffset += CopyTy.getSizeInBytes();
11006 }
11007 MI.eraseFromParent();
11008 return Legalized;
11009}
11010
11013 const unsigned Opc = MI.getOpcode();
11014 // This combine is fairly complex so it's not written with a separate
11015 // matcher function.
11016 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
11017 Opc == TargetOpcode::G_MEMSET) &&
11018 "Expected memcpy like instruction");
11019
11020 auto MMOIt = MI.memoperands_begin();
11021 const MachineMemOperand *MemOp = *MMOIt;
11022
11023 Align DstAlign = MemOp->getBaseAlign();
11024 Align SrcAlign;
11025 auto [Dst, Src, Len] = MI.getFirst3Regs();
11026
11027 if (Opc != TargetOpcode::G_MEMSET) {
11028 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
11029 MemOp = *(++MMOIt);
11030 SrcAlign = MemOp->getBaseAlign();
11031 }
11032
11033 // See if this is a constant length copy
11034 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
11035 if (!LenVRegAndVal)
11036 return UnableToLegalize;
11037 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11038
11039 if (KnownLen == 0) {
11040 MI.eraseFromParent();
11041 return Legalized;
11042 }
11043
11044 if (MaxLen && KnownLen > MaxLen)
11045 return UnableToLegalize;
11046
11047 bool IsVolatile = MemOp->isVolatile();
11048 if (Opc == TargetOpcode::G_MEMCPY) {
11049 auto &MF = *MI.getParent()->getParent();
11050 const auto &TLI = *MF.getSubtarget().getTargetLowering();
11051 bool OptSize = shouldLowerMemFuncForSize(MF);
11052 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11053 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11054 IsVolatile);
11055 }
11056 if (Opc == TargetOpcode::G_MEMMOVE)
11057 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11058 if (Opc == TargetOpcode::G_MEMSET)
11059 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
11060 return UnableToLegalize;
11061}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
APInt bitcastToAPInt() const
Definition APFloat.h:1408
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1193
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:956
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1483
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1016
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:216
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:646
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2060
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:293
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1589
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1646
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1213
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:347
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1301
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:345
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:610
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.