LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy = LLT::scalarOrVector(
73 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
74 } else {
75 LeftoverTy = LLT::scalar(LeftoverSize);
76 }
77
78 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
79 return std::make_pair(NumParts, NumLeftover);
80}
81
83
84 if (!Ty.isScalar())
85 return nullptr;
86
87 switch (Ty.getSizeInBits()) {
88 case 16:
89 return Type::getHalfTy(Ctx);
90 case 32:
91 return Type::getFloatTy(Ctx);
92 case 64:
93 return Type::getDoubleTy(Ctx);
94 case 80:
95 return Type::getX86_FP80Ty(Ctx);
96 case 128:
97 return Type::getFP128Ty(Ctx);
98 default:
99 return nullptr;
100 }
101}
102
104 GISelChangeObserver &Observer,
105 MachineIRBuilder &Builder)
106 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107 LI(*MF.getSubtarget().getLegalizerInfo()),
108 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
109
111 GISelChangeObserver &Observer,
113 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
114 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115
118 LostDebugLocObserver &LocObserver) {
119 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
120
122
123 if (isa<GIntrinsic>(MI))
124 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
125 auto Step = LI.getAction(MI, MRI);
126 switch (Step.Action) {
127 case Legal:
128 LLVM_DEBUG(dbgs() << ".. Already legal\n");
129 return AlreadyLegal;
130 case Libcall:
131 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132 return libcall(MI, LocObserver);
133 case NarrowScalar:
134 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
135 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
136 case WidenScalar:
137 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
138 return widenScalar(MI, Step.TypeIdx, Step.NewType);
139 case Bitcast:
140 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
141 return bitcast(MI, Step.TypeIdx, Step.NewType);
142 case Lower:
143 LLVM_DEBUG(dbgs() << ".. Lower\n");
144 return lower(MI, Step.TypeIdx, Step.NewType);
145 case FewerElements:
146 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
147 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
148 case MoreElements:
149 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
150 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
151 case Custom:
152 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
153 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
155 default:
156 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
157 return UnableToLegalize;
158 }
159}
160
161void LegalizerHelper::insertParts(Register DstReg,
162 LLT ResultTy, LLT PartTy,
163 ArrayRef<Register> PartRegs,
164 LLT LeftoverTy,
165 ArrayRef<Register> LeftoverRegs) {
166 if (!LeftoverTy.isValid()) {
167 assert(LeftoverRegs.empty());
168
169 if (!ResultTy.isVector()) {
170 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
171 return;
172 }
173
174 if (PartTy.isVector())
175 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
176 else
177 MIRBuilder.buildBuildVector(DstReg, PartRegs);
178 return;
179 }
180
181 // Merge sub-vectors with different number of elements and insert into DstReg.
182 if (ResultTy.isVector()) {
183 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
185 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
186 AllRegs.push_back(Reg);
187 return mergeMixedSubvectors(DstReg, AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
202 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (MRI.getType(Leftover).isScalar())
216 AllElts.push_back(Leftover);
217 else
218 appendVectorElts(AllElts, Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
245 getUnmergeResults(Parts, *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(SrcReg);
252 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(DstTy, NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
281 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(DstReg, Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372
373 MIRBuilder.buildUnmerge(UnmergeDefs,
374 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_MUL:
414 RTLIBCASE_INT(MUL_I);
415 case TargetOpcode::G_SDIV:
416 RTLIBCASE_INT(SDIV_I);
417 case TargetOpcode::G_UDIV:
418 RTLIBCASE_INT(UDIV_I);
419 case TargetOpcode::G_SREM:
420 RTLIBCASE_INT(SREM_I);
421 case TargetOpcode::G_UREM:
422 RTLIBCASE_INT(UREM_I);
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424 RTLIBCASE_INT(CTLZ_I);
425 case TargetOpcode::G_FADD:
426 RTLIBCASE(ADD_F);
427 case TargetOpcode::G_FSUB:
428 RTLIBCASE(SUB_F);
429 case TargetOpcode::G_FMUL:
430 RTLIBCASE(MUL_F);
431 case TargetOpcode::G_FDIV:
432 RTLIBCASE(DIV_F);
433 case TargetOpcode::G_FEXP:
434 RTLIBCASE(EXP_F);
435 case TargetOpcode::G_FEXP2:
436 RTLIBCASE(EXP2_F);
437 case TargetOpcode::G_FEXP10:
438 RTLIBCASE(EXP10_F);
439 case TargetOpcode::G_FREM:
440 RTLIBCASE(REM_F);
441 case TargetOpcode::G_FPOW:
442 RTLIBCASE(POW_F);
443 case TargetOpcode::G_FPOWI:
444 RTLIBCASE(POWI_F);
445 case TargetOpcode::G_FMA:
446 RTLIBCASE(FMA_F);
447 case TargetOpcode::G_FSIN:
448 RTLIBCASE(SIN_F);
449 case TargetOpcode::G_FCOS:
450 RTLIBCASE(COS_F);
451 case TargetOpcode::G_FLOG10:
452 RTLIBCASE(LOG10_F);
453 case TargetOpcode::G_FLOG:
454 RTLIBCASE(LOG_F);
455 case TargetOpcode::G_FLOG2:
456 RTLIBCASE(LOG2_F);
457 case TargetOpcode::G_FLDEXP:
458 RTLIBCASE(LDEXP_F);
459 case TargetOpcode::G_FCEIL:
460 RTLIBCASE(CEIL_F);
461 case TargetOpcode::G_FFLOOR:
462 RTLIBCASE(FLOOR_F);
463 case TargetOpcode::G_FMINNUM:
464 RTLIBCASE(FMIN_F);
465 case TargetOpcode::G_FMAXNUM:
466 RTLIBCASE(FMAX_F);
467 case TargetOpcode::G_FSQRT:
468 RTLIBCASE(SQRT_F);
469 case TargetOpcode::G_FRINT:
470 RTLIBCASE(RINT_F);
471 case TargetOpcode::G_FNEARBYINT:
472 RTLIBCASE(NEARBYINT_F);
473 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
474 RTLIBCASE(ROUNDEVEN_F);
475 }
476 llvm_unreachable("Unknown libcall function");
477}
478
479/// True if an instruction is in tail position in its caller. Intended for
480/// legalizing libcalls as tail calls when possible.
483 const TargetInstrInfo &TII,
485 MachineBasicBlock &MBB = *MI.getParent();
486 const Function &F = MBB.getParent()->getFunction();
487
488 // Conservatively require the attributes of the call to match those of
489 // the return. Ignore NoAlias and NonNull because they don't affect the
490 // call sequence.
491 AttributeList CallerAttrs = F.getAttributes();
492 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
493 .removeAttribute(Attribute::NoAlias)
494 .removeAttribute(Attribute::NonNull)
495 .hasAttributes())
496 return false;
497
498 // It's not safe to eliminate the sign / zero extension of the return value.
499 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
500 CallerAttrs.hasRetAttr(Attribute::SExt))
501 return false;
502
503 // Only tail call if the following instruction is a standard return or if we
504 // have a `thisreturn` callee, and a sequence like:
505 //
506 // G_MEMCPY %0, %1, %2
507 // $x0 = COPY %0
508 // RET_ReallyLR implicit $x0
509 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
510 if (Next != MBB.instr_end() && Next->isCopy()) {
511 if (MI.getOpcode() == TargetOpcode::G_BZERO)
512 return false;
513
514 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
515 // mempy/etc routines return the same parameter. For other it will be the
516 // returned value.
517 Register VReg = MI.getOperand(0).getReg();
518 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
519 return false;
520
521 Register PReg = Next->getOperand(0).getReg();
522 if (!PReg.isPhysical())
523 return false;
524
525 auto Ret = next_nodbg(Next, MBB.instr_end());
526 if (Ret == MBB.instr_end() || !Ret->isReturn())
527 return false;
528
529 if (Ret->getNumImplicitOperands() != 1)
530 return false;
531
532 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
533 return false;
534
535 // Skip over the COPY that we just validated.
536 Next = Ret;
537 }
538
539 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
540 return false;
541
542 return true;
543}
544
547 const CallLowering::ArgInfo &Result,
549 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
550 MachineInstr *MI) {
551 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
552
554 Info.CallConv = CC;
556 Info.OrigRet = Result;
557 if (MI)
558 Info.IsTailCall =
559 (Result.Ty->isVoidTy() ||
560 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
561 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
562 *MIRBuilder.getMRI());
563
564 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
565 if (!CLI.lowerCall(MIRBuilder, Info))
567
568 if (MI && Info.LoweredTailCall) {
569 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
570
571 // Check debug locations before removing the return.
572 LocObserver.checkpoint(true);
573
574 // We must have a return following the call (or debug insts) to get past
575 // isLibCallInTailPosition.
576 do {
577 MachineInstr *Next = MI->getNextNode();
578 assert(Next &&
579 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
580 "Expected instr following MI to be return or debug inst?");
581 // We lowered a tail call, so the call is now the return from the block.
582 // Delete the old return.
583 Next->eraseFromParent();
584 } while (MI->getNextNode());
585
586 // We expect to lose the debug location from the return.
587 LocObserver.checkpoint(false);
588 }
590}
591
594 const CallLowering::ArgInfo &Result,
596 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
597 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
598 const char *Name = TLI.getLibcallName(Libcall);
599 if (!Name)
601 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
602 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
603}
604
605// Useful for libcalls where all operands have the same type.
608 Type *OpType, LostDebugLocObserver &LocObserver) {
609 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
610
611 // FIXME: What does the original arg index mean here?
613 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
614 Args.push_back({MO.getReg(), OpType, 0});
615 return createLibcall(MIRBuilder, Libcall,
616 {MI.getOperand(0).getReg(), OpType, 0}, Args,
617 LocObserver, &MI);
618}
619
622 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
623 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
624
626 // Add all the args, except for the last which is an imm denoting 'tail'.
627 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
628 Register Reg = MI.getOperand(i).getReg();
629
630 // Need derive an IR type for call lowering.
631 LLT OpLLT = MRI.getType(Reg);
632 Type *OpTy = nullptr;
633 if (OpLLT.isPointer())
634 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
635 else
636 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
637 Args.push_back({Reg, OpTy, 0});
638 }
639
640 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
641 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
642 RTLIB::Libcall RTLibcall;
643 unsigned Opc = MI.getOpcode();
644 switch (Opc) {
645 case TargetOpcode::G_BZERO:
646 RTLibcall = RTLIB::BZERO;
647 break;
648 case TargetOpcode::G_MEMCPY:
649 RTLibcall = RTLIB::MEMCPY;
650 Args[0].Flags[0].setReturned();
651 break;
652 case TargetOpcode::G_MEMMOVE:
653 RTLibcall = RTLIB::MEMMOVE;
654 Args[0].Flags[0].setReturned();
655 break;
656 case TargetOpcode::G_MEMSET:
657 RTLibcall = RTLIB::MEMSET;
658 Args[0].Flags[0].setReturned();
659 break;
660 default:
661 llvm_unreachable("unsupported opcode");
662 }
663 const char *Name = TLI.getLibcallName(RTLibcall);
664
665 // Unsupported libcall on the target.
666 if (!Name) {
667 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
668 << MIRBuilder.getTII().getName(Opc) << "\n");
670 }
671
673 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
675 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
676 Info.IsTailCall =
677 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
678 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
679
680 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
681 if (!CLI.lowerCall(MIRBuilder, Info))
683
684 if (Info.LoweredTailCall) {
685 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
686
687 // Check debug locations before removing the return.
688 LocObserver.checkpoint(true);
689
690 // We must have a return following the call (or debug insts) to get past
691 // isLibCallInTailPosition.
692 do {
693 MachineInstr *Next = MI.getNextNode();
694 assert(Next &&
695 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
696 "Expected instr following MI to be return or debug inst?");
697 // We lowered a tail call, so the call is now the return from the block.
698 // Delete the old return.
699 Next->eraseFromParent();
700 } while (MI.getNextNode());
701
702 // We expect to lose the debug location from the return.
703 LocObserver.checkpoint(false);
704 }
705
707}
708
710 unsigned Opc = MI.getOpcode();
711 auto &AtomicMI = cast<GMemOperation>(MI);
712 auto &MMO = AtomicMI.getMMO();
713 auto Ordering = MMO.getMergedOrdering();
714 LLT MemType = MMO.getMemoryType();
715 uint64_t MemSize = MemType.getSizeInBytes();
716 if (MemType.isVector())
717 return RTLIB::UNKNOWN_LIBCALL;
718
719#define LCALLS(A, B) \
720 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
721#define LCALL5(A) \
722 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
723 switch (Opc) {
724 case TargetOpcode::G_ATOMIC_CMPXCHG:
725 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
726 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
727 return getOutlineAtomicHelper(LC, Ordering, MemSize);
728 }
729 case TargetOpcode::G_ATOMICRMW_XCHG: {
730 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
731 return getOutlineAtomicHelper(LC, Ordering, MemSize);
732 }
733 case TargetOpcode::G_ATOMICRMW_ADD:
734 case TargetOpcode::G_ATOMICRMW_SUB: {
735 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
736 return getOutlineAtomicHelper(LC, Ordering, MemSize);
737 }
738 case TargetOpcode::G_ATOMICRMW_AND: {
739 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
740 return getOutlineAtomicHelper(LC, Ordering, MemSize);
741 }
742 case TargetOpcode::G_ATOMICRMW_OR: {
743 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
744 return getOutlineAtomicHelper(LC, Ordering, MemSize);
745 }
746 case TargetOpcode::G_ATOMICRMW_XOR: {
747 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
748 return getOutlineAtomicHelper(LC, Ordering, MemSize);
749 }
750 default:
751 return RTLIB::UNKNOWN_LIBCALL;
752 }
753#undef LCALLS
754#undef LCALL5
755}
756
759 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
760
761 Type *RetTy;
762 SmallVector<Register> RetRegs;
764 unsigned Opc = MI.getOpcode();
765 switch (Opc) {
766 case TargetOpcode::G_ATOMIC_CMPXCHG:
767 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
769 LLT SuccessLLT;
770 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
771 MI.getFirst4RegLLTs();
772 RetRegs.push_back(Ret);
773 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
774 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
775 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
776 NewLLT) = MI.getFirst5RegLLTs();
777 RetRegs.push_back(Success);
779 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
780 }
781 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
782 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
783 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
784 break;
785 }
786 case TargetOpcode::G_ATOMICRMW_XCHG:
787 case TargetOpcode::G_ATOMICRMW_ADD:
788 case TargetOpcode::G_ATOMICRMW_SUB:
789 case TargetOpcode::G_ATOMICRMW_AND:
790 case TargetOpcode::G_ATOMICRMW_OR:
791 case TargetOpcode::G_ATOMICRMW_XOR: {
792 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
793 RetRegs.push_back(Ret);
794 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
795 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
796 Val =
797 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
798 .getReg(0);
799 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
800 Val =
801 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
802 .getReg(0);
803 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
804 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
805 break;
806 }
807 default:
808 llvm_unreachable("unsupported opcode");
809 }
810
811 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
812 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
814 const char *Name = TLI.getLibcallName(RTLibcall);
815
816 // Unsupported libcall on the target.
817 if (!Name) {
818 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
819 << MIRBuilder.getTII().getName(Opc) << "\n");
821 }
822
824 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
826 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
827
828 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
829 if (!CLI.lowerCall(MIRBuilder, Info))
831
833}
834
835static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
836 Type *FromType) {
837 auto ToMVT = MVT::getVT(ToType);
838 auto FromMVT = MVT::getVT(FromType);
839
840 switch (Opcode) {
841 case TargetOpcode::G_FPEXT:
842 return RTLIB::getFPEXT(FromMVT, ToMVT);
843 case TargetOpcode::G_FPTRUNC:
844 return RTLIB::getFPROUND(FromMVT, ToMVT);
845 case TargetOpcode::G_FPTOSI:
846 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
847 case TargetOpcode::G_FPTOUI:
848 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
849 case TargetOpcode::G_SITOFP:
850 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
851 case TargetOpcode::G_UITOFP:
852 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
853 }
854 llvm_unreachable("Unsupported libcall function");
855}
856
859 Type *FromType, LostDebugLocObserver &LocObserver) {
860 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
861 return createLibcall(
862 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
863 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
864}
865
866static RTLIB::Libcall
868 RTLIB::Libcall RTLibcall;
869 switch (MI.getOpcode()) {
870 case TargetOpcode::G_GET_FPENV:
871 RTLibcall = RTLIB::FEGETENV;
872 break;
873 case TargetOpcode::G_SET_FPENV:
874 case TargetOpcode::G_RESET_FPENV:
875 RTLibcall = RTLIB::FESETENV;
876 break;
877 case TargetOpcode::G_GET_FPMODE:
878 RTLibcall = RTLIB::FEGETMODE;
879 break;
880 case TargetOpcode::G_SET_FPMODE:
881 case TargetOpcode::G_RESET_FPMODE:
882 RTLibcall = RTLIB::FESETMODE;
883 break;
884 default:
885 llvm_unreachable("Unexpected opcode");
886 }
887 return RTLibcall;
888}
889
890// Some library functions that read FP state (fegetmode, fegetenv) write the
891// state into a region in memory. IR intrinsics that do the same operations
892// (get_fpmode, get_fpenv) return the state as integer value. To implement these
893// intrinsics via the library functions, we need to use temporary variable,
894// for example:
895//
896// %0:_(s32) = G_GET_FPMODE
897//
898// is transformed to:
899//
900// %1:_(p0) = G_FRAME_INDEX %stack.0
901// BL &fegetmode
902// %0:_(s32) = G_LOAD % 1
903//
905LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
907 LostDebugLocObserver &LocObserver) {
909 auto &MF = MIRBuilder.getMF();
910 auto &MRI = *MIRBuilder.getMRI();
911 auto &Ctx = MF.getFunction().getContext();
912
913 // Create temporary, where library function will put the read state.
914 Register Dst = MI.getOperand(0).getReg();
915 LLT StateTy = MRI.getType(Dst);
916 TypeSize StateSize = StateTy.getSizeInBytes();
918 MachinePointerInfo TempPtrInfo;
919 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
920
921 // Create a call to library function, with the temporary as an argument.
922 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
923 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
925 auto Res =
926 createLibcall(MIRBuilder, RTLibcall,
928 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
929 LocObserver, nullptr);
931 return Res;
932
933 // Create a load from the temporary.
934 MachineMemOperand *MMO = MF.getMachineMemOperand(
935 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
936 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
937
939}
940
941// Similar to `createGetStateLibcall` the function calls a library function
942// using transient space in stack. In this case the library function reads
943// content of memory region.
945LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
947 LostDebugLocObserver &LocObserver) {
949 auto &MF = MIRBuilder.getMF();
950 auto &MRI = *MIRBuilder.getMRI();
951 auto &Ctx = MF.getFunction().getContext();
952
953 // Create temporary, where library function will get the new state.
954 Register Src = MI.getOperand(0).getReg();
955 LLT StateTy = MRI.getType(Src);
956 TypeSize StateSize = StateTy.getSizeInBytes();
958 MachinePointerInfo TempPtrInfo;
959 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
960
961 // Put the new state into the temporary.
962 MachineMemOperand *MMO = MF.getMachineMemOperand(
963 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
964 MIRBuilder.buildStore(Src, Temp, *MMO);
965
966 // Create a call to library function, with the temporary as an argument.
967 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
968 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
970 return createLibcall(MIRBuilder, RTLibcall,
972 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
973 LocObserver, nullptr);
974}
975
976// The function is used to legalize operations that set default environment
977// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
978// On most targets supported in glibc FE_DFL_MODE is defined as
979// `((const femode_t *) -1)`. Such assumption is used here. If for some target
980// it is not true, the target must provide custom lowering.
982LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
984 LostDebugLocObserver &LocObserver) {
986 auto &MF = MIRBuilder.getMF();
987 auto &Ctx = MF.getFunction().getContext();
988
989 // Create an argument for the library function.
990 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
991 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
992 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
993 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
994 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
995 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
996 MIRBuilder.buildIntToPtr(Dest, DefValue);
997
999 return createLibcall(MIRBuilder, RTLibcall,
1001 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1002 LocObserver, &MI);
1003}
1004
1007 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1008
1009 switch (MI.getOpcode()) {
1010 default:
1011 return UnableToLegalize;
1012 case TargetOpcode::G_MUL:
1013 case TargetOpcode::G_SDIV:
1014 case TargetOpcode::G_UDIV:
1015 case TargetOpcode::G_SREM:
1016 case TargetOpcode::G_UREM:
1017 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1018 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1019 unsigned Size = LLTy.getSizeInBits();
1020 Type *HLTy = IntegerType::get(Ctx, Size);
1021 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1022 if (Status != Legalized)
1023 return Status;
1024 break;
1025 }
1026 case TargetOpcode::G_FADD:
1027 case TargetOpcode::G_FSUB:
1028 case TargetOpcode::G_FMUL:
1029 case TargetOpcode::G_FDIV:
1030 case TargetOpcode::G_FMA:
1031 case TargetOpcode::G_FPOW:
1032 case TargetOpcode::G_FREM:
1033 case TargetOpcode::G_FCOS:
1034 case TargetOpcode::G_FSIN:
1035 case TargetOpcode::G_FLOG10:
1036 case TargetOpcode::G_FLOG:
1037 case TargetOpcode::G_FLOG2:
1038 case TargetOpcode::G_FLDEXP:
1039 case TargetOpcode::G_FEXP:
1040 case TargetOpcode::G_FEXP2:
1041 case TargetOpcode::G_FEXP10:
1042 case TargetOpcode::G_FCEIL:
1043 case TargetOpcode::G_FFLOOR:
1044 case TargetOpcode::G_FMINNUM:
1045 case TargetOpcode::G_FMAXNUM:
1046 case TargetOpcode::G_FSQRT:
1047 case TargetOpcode::G_FRINT:
1048 case TargetOpcode::G_FNEARBYINT:
1049 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1050 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1051 unsigned Size = LLTy.getSizeInBits();
1052 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1053 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1054 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1055 return UnableToLegalize;
1056 }
1057 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1058 if (Status != Legalized)
1059 return Status;
1060 break;
1061 }
1062 case TargetOpcode::G_FPOWI: {
1063 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1064 unsigned Size = LLTy.getSizeInBits();
1065 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1066 Type *ITy = IntegerType::get(
1067 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1068 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1069 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1070 return UnableToLegalize;
1071 }
1072 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1073 std::initializer_list<CallLowering::ArgInfo> Args = {
1074 {MI.getOperand(1).getReg(), HLTy, 0},
1075 {MI.getOperand(2).getReg(), ITy, 1}};
1077 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1078 Args, LocObserver, &MI);
1079 if (Status != Legalized)
1080 return Status;
1081 break;
1082 }
1083 case TargetOpcode::G_FPEXT:
1084 case TargetOpcode::G_FPTRUNC: {
1085 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1086 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1087 if (!FromTy || !ToTy)
1088 return UnableToLegalize;
1090 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1091 if (Status != Legalized)
1092 return Status;
1093 break;
1094 }
1095 case TargetOpcode::G_FPTOSI:
1096 case TargetOpcode::G_FPTOUI: {
1097 // FIXME: Support other types
1098 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1099 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1100 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1101 return UnableToLegalize;
1103 MI, MIRBuilder,
1104 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1105 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1106 LocObserver);
1107 if (Status != Legalized)
1108 return Status;
1109 break;
1110 }
1111 case TargetOpcode::G_SITOFP:
1112 case TargetOpcode::G_UITOFP: {
1113 // FIXME: Support other types
1114 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1115 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1116 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1117 return UnableToLegalize;
1119 MI, MIRBuilder,
1120 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1121 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1122 LocObserver);
1123 if (Status != Legalized)
1124 return Status;
1125 break;
1126 }
1127 case TargetOpcode::G_ATOMICRMW_XCHG:
1128 case TargetOpcode::G_ATOMICRMW_ADD:
1129 case TargetOpcode::G_ATOMICRMW_SUB:
1130 case TargetOpcode::G_ATOMICRMW_AND:
1131 case TargetOpcode::G_ATOMICRMW_OR:
1132 case TargetOpcode::G_ATOMICRMW_XOR:
1133 case TargetOpcode::G_ATOMIC_CMPXCHG:
1134 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1136 if (Status != Legalized)
1137 return Status;
1138 break;
1139 }
1140 case TargetOpcode::G_BZERO:
1141 case TargetOpcode::G_MEMCPY:
1142 case TargetOpcode::G_MEMMOVE:
1143 case TargetOpcode::G_MEMSET: {
1144 LegalizeResult Result =
1145 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1146 if (Result != Legalized)
1147 return Result;
1148 MI.eraseFromParent();
1149 return Result;
1150 }
1151 case TargetOpcode::G_GET_FPENV:
1152 case TargetOpcode::G_GET_FPMODE: {
1153 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1154 if (Result != Legalized)
1155 return Result;
1156 break;
1157 }
1158 case TargetOpcode::G_SET_FPENV:
1159 case TargetOpcode::G_SET_FPMODE: {
1160 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1161 if (Result != Legalized)
1162 return Result;
1163 break;
1164 }
1165 case TargetOpcode::G_RESET_FPENV:
1166 case TargetOpcode::G_RESET_FPMODE: {
1167 LegalizeResult Result =
1168 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1169 if (Result != Legalized)
1170 return Result;
1171 break;
1172 }
1173 }
1174
1175 MI.eraseFromParent();
1176 return Legalized;
1177}
1178
1180 unsigned TypeIdx,
1181 LLT NarrowTy) {
1182 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1183 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1184
1185 switch (MI.getOpcode()) {
1186 default:
1187 return UnableToLegalize;
1188 case TargetOpcode::G_IMPLICIT_DEF: {
1189 Register DstReg = MI.getOperand(0).getReg();
1190 LLT DstTy = MRI.getType(DstReg);
1191
1192 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1193 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1194 // FIXME: Although this would also be legal for the general case, it causes
1195 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1196 // combines not being hit). This seems to be a problem related to the
1197 // artifact combiner.
1198 if (SizeOp0 % NarrowSize != 0) {
1199 LLT ImplicitTy = NarrowTy;
1200 if (DstTy.isVector())
1201 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1202
1203 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1204 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1205
1206 MI.eraseFromParent();
1207 return Legalized;
1208 }
1209
1210 int NumParts = SizeOp0 / NarrowSize;
1211
1213 for (int i = 0; i < NumParts; ++i)
1214 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1215
1216 if (DstTy.isVector())
1217 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1218 else
1219 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1220 MI.eraseFromParent();
1221 return Legalized;
1222 }
1223 case TargetOpcode::G_CONSTANT: {
1224 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1225 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1226 unsigned TotalSize = Ty.getSizeInBits();
1227 unsigned NarrowSize = NarrowTy.getSizeInBits();
1228 int NumParts = TotalSize / NarrowSize;
1229
1230 SmallVector<Register, 4> PartRegs;
1231 for (int I = 0; I != NumParts; ++I) {
1232 unsigned Offset = I * NarrowSize;
1233 auto K = MIRBuilder.buildConstant(NarrowTy,
1234 Val.lshr(Offset).trunc(NarrowSize));
1235 PartRegs.push_back(K.getReg(0));
1236 }
1237
1238 LLT LeftoverTy;
1239 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1240 SmallVector<Register, 1> LeftoverRegs;
1241 if (LeftoverBits != 0) {
1242 LeftoverTy = LLT::scalar(LeftoverBits);
1243 auto K = MIRBuilder.buildConstant(
1244 LeftoverTy,
1245 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1246 LeftoverRegs.push_back(K.getReg(0));
1247 }
1248
1249 insertParts(MI.getOperand(0).getReg(),
1250 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1251
1252 MI.eraseFromParent();
1253 return Legalized;
1254 }
1255 case TargetOpcode::G_SEXT:
1256 case TargetOpcode::G_ZEXT:
1257 case TargetOpcode::G_ANYEXT:
1258 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1259 case TargetOpcode::G_TRUNC: {
1260 if (TypeIdx != 1)
1261 return UnableToLegalize;
1262
1263 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1264 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1265 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1266 return UnableToLegalize;
1267 }
1268
1269 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1270 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1271 MI.eraseFromParent();
1272 return Legalized;
1273 }
1274
1275 case TargetOpcode::G_FREEZE: {
1276 if (TypeIdx != 0)
1277 return UnableToLegalize;
1278
1279 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1280 // Should widen scalar first
1281 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1282 return UnableToLegalize;
1283
1284 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1286 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1287 Parts.push_back(
1288 MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1289 }
1290
1291 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1292 MI.eraseFromParent();
1293 return Legalized;
1294 }
1295 case TargetOpcode::G_ADD:
1296 case TargetOpcode::G_SUB:
1297 case TargetOpcode::G_SADDO:
1298 case TargetOpcode::G_SSUBO:
1299 case TargetOpcode::G_SADDE:
1300 case TargetOpcode::G_SSUBE:
1301 case TargetOpcode::G_UADDO:
1302 case TargetOpcode::G_USUBO:
1303 case TargetOpcode::G_UADDE:
1304 case TargetOpcode::G_USUBE:
1305 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1306 case TargetOpcode::G_MUL:
1307 case TargetOpcode::G_UMULH:
1308 return narrowScalarMul(MI, NarrowTy);
1309 case TargetOpcode::G_EXTRACT:
1310 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1311 case TargetOpcode::G_INSERT:
1312 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1313 case TargetOpcode::G_LOAD: {
1314 auto &LoadMI = cast<GLoad>(MI);
1315 Register DstReg = LoadMI.getDstReg();
1316 LLT DstTy = MRI.getType(DstReg);
1317 if (DstTy.isVector())
1318 return UnableToLegalize;
1319
1320 if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1321 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1322 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1323 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1324 LoadMI.eraseFromParent();
1325 return Legalized;
1326 }
1327
1328 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1329 }
1330 case TargetOpcode::G_ZEXTLOAD:
1331 case TargetOpcode::G_SEXTLOAD: {
1332 auto &LoadMI = cast<GExtLoad>(MI);
1333 Register DstReg = LoadMI.getDstReg();
1334 Register PtrReg = LoadMI.getPointerReg();
1335
1336 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1337 auto &MMO = LoadMI.getMMO();
1338 unsigned MemSize = MMO.getSizeInBits();
1339
1340 if (MemSize == NarrowSize) {
1341 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1342 } else if (MemSize < NarrowSize) {
1343 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1344 } else if (MemSize > NarrowSize) {
1345 // FIXME: Need to split the load.
1346 return UnableToLegalize;
1347 }
1348
1349 if (isa<GZExtLoad>(LoadMI))
1350 MIRBuilder.buildZExt(DstReg, TmpReg);
1351 else
1352 MIRBuilder.buildSExt(DstReg, TmpReg);
1353
1354 LoadMI.eraseFromParent();
1355 return Legalized;
1356 }
1357 case TargetOpcode::G_STORE: {
1358 auto &StoreMI = cast<GStore>(MI);
1359
1360 Register SrcReg = StoreMI.getValueReg();
1361 LLT SrcTy = MRI.getType(SrcReg);
1362 if (SrcTy.isVector())
1363 return UnableToLegalize;
1364
1365 int NumParts = SizeOp0 / NarrowSize;
1366 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1367 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1368 if (SrcTy.isVector() && LeftoverBits != 0)
1369 return UnableToLegalize;
1370
1371 if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1372 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1373 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1374 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1375 StoreMI.eraseFromParent();
1376 return Legalized;
1377 }
1378
1379 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1380 }
1381 case TargetOpcode::G_SELECT:
1382 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1383 case TargetOpcode::G_AND:
1384 case TargetOpcode::G_OR:
1385 case TargetOpcode::G_XOR: {
1386 // Legalize bitwise operation:
1387 // A = BinOp<Ty> B, C
1388 // into:
1389 // B1, ..., BN = G_UNMERGE_VALUES B
1390 // C1, ..., CN = G_UNMERGE_VALUES C
1391 // A1 = BinOp<Ty/N> B1, C2
1392 // ...
1393 // AN = BinOp<Ty/N> BN, CN
1394 // A = G_MERGE_VALUES A1, ..., AN
1395 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1396 }
1397 case TargetOpcode::G_SHL:
1398 case TargetOpcode::G_LSHR:
1399 case TargetOpcode::G_ASHR:
1400 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1401 case TargetOpcode::G_CTLZ:
1402 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1403 case TargetOpcode::G_CTTZ:
1404 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1405 case TargetOpcode::G_CTPOP:
1406 if (TypeIdx == 1)
1407 switch (MI.getOpcode()) {
1408 case TargetOpcode::G_CTLZ:
1409 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1410 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1411 case TargetOpcode::G_CTTZ:
1412 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1413 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1414 case TargetOpcode::G_CTPOP:
1415 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1416 default:
1417 return UnableToLegalize;
1418 }
1419
1421 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1423 return Legalized;
1424 case TargetOpcode::G_INTTOPTR:
1425 if (TypeIdx != 1)
1426 return UnableToLegalize;
1427
1429 narrowScalarSrc(MI, NarrowTy, 1);
1431 return Legalized;
1432 case TargetOpcode::G_PTRTOINT:
1433 if (TypeIdx != 0)
1434 return UnableToLegalize;
1435
1437 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1439 return Legalized;
1440 case TargetOpcode::G_PHI: {
1441 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1442 // NarrowSize.
1443 if (SizeOp0 % NarrowSize != 0)
1444 return UnableToLegalize;
1445
1446 unsigned NumParts = SizeOp0 / NarrowSize;
1447 SmallVector<Register, 2> DstRegs(NumParts);
1448 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1450 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1451 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1453 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1454 SrcRegs[i / 2], MIRBuilder, MRI);
1455 }
1456 MachineBasicBlock &MBB = *MI.getParent();
1458 for (unsigned i = 0; i < NumParts; ++i) {
1459 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1461 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1462 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1463 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1464 }
1466 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1468 MI.eraseFromParent();
1469 return Legalized;
1470 }
1471 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1472 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1473 if (TypeIdx != 2)
1474 return UnableToLegalize;
1475
1476 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1478 narrowScalarSrc(MI, NarrowTy, OpIdx);
1480 return Legalized;
1481 }
1482 case TargetOpcode::G_ICMP: {
1483 Register LHS = MI.getOperand(2).getReg();
1484 LLT SrcTy = MRI.getType(LHS);
1485 uint64_t SrcSize = SrcTy.getSizeInBits();
1486 CmpInst::Predicate Pred =
1487 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1488
1489 // TODO: Handle the non-equality case for weird sizes.
1490 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1491 return UnableToLegalize;
1492
1493 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1494 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1495 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1496 LHSLeftoverRegs, MIRBuilder, MRI))
1497 return UnableToLegalize;
1498
1499 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1500 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1501 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1502 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1503 return UnableToLegalize;
1504
1505 // We now have the LHS and RHS of the compare split into narrow-type
1506 // registers, plus potentially some leftover type.
1507 Register Dst = MI.getOperand(0).getReg();
1508 LLT ResTy = MRI.getType(Dst);
1509 if (ICmpInst::isEquality(Pred)) {
1510 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1511 // them together. For each equal part, the result should be all 0s. For
1512 // each non-equal part, we'll get at least one 1.
1513 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1515 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1516 auto LHS = std::get<0>(LHSAndRHS);
1517 auto RHS = std::get<1>(LHSAndRHS);
1518 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1519 Xors.push_back(Xor);
1520 }
1521
1522 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1523 // to the desired narrow type so that we can OR them together later.
1524 SmallVector<Register, 4> WidenedXors;
1525 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1526 auto LHS = std::get<0>(LHSAndRHS);
1527 auto RHS = std::get<1>(LHSAndRHS);
1528 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1529 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1530 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1531 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1532 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1533 }
1534
1535 // Now, for each part we broke up, we know if they are equal/not equal
1536 // based off the G_XOR. We can OR these all together and compare against
1537 // 0 to get the result.
1538 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1539 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1540 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1541 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1542 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1543 } else {
1544 // TODO: Handle non-power-of-two types.
1545 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1546 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1547 Register LHSL = LHSPartRegs[0];
1548 Register LHSH = LHSPartRegs[1];
1549 Register RHSL = RHSPartRegs[0];
1550 Register RHSH = RHSPartRegs[1];
1551 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1552 MachineInstrBuilder CmpHEQ =
1555 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1556 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1557 }
1558 MI.eraseFromParent();
1559 return Legalized;
1560 }
1561 case TargetOpcode::G_FCMP:
1562 if (TypeIdx != 0)
1563 return UnableToLegalize;
1564
1566 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1568 return Legalized;
1569
1570 case TargetOpcode::G_SEXT_INREG: {
1571 if (TypeIdx != 0)
1572 return UnableToLegalize;
1573
1574 int64_t SizeInBits = MI.getOperand(2).getImm();
1575
1576 // So long as the new type has more bits than the bits we're extending we
1577 // don't need to break it apart.
1578 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1580 // We don't lose any non-extension bits by truncating the src and
1581 // sign-extending the dst.
1582 MachineOperand &MO1 = MI.getOperand(1);
1583 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1584 MO1.setReg(TruncMIB.getReg(0));
1585
1586 MachineOperand &MO2 = MI.getOperand(0);
1587 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1589 MIRBuilder.buildSExt(MO2, DstExt);
1590 MO2.setReg(DstExt);
1592 return Legalized;
1593 }
1594
1595 // Break it apart. Components below the extension point are unmodified. The
1596 // component containing the extension point becomes a narrower SEXT_INREG.
1597 // Components above it are ashr'd from the component containing the
1598 // extension point.
1599 if (SizeOp0 % NarrowSize != 0)
1600 return UnableToLegalize;
1601 int NumParts = SizeOp0 / NarrowSize;
1602
1603 // List the registers where the destination will be scattered.
1605 // List the registers where the source will be split.
1607
1608 // Create all the temporary registers.
1609 for (int i = 0; i < NumParts; ++i) {
1610 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1611
1612 SrcRegs.push_back(SrcReg);
1613 }
1614
1615 // Explode the big arguments into smaller chunks.
1616 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1617
1618 Register AshrCstReg =
1619 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1620 .getReg(0);
1621 Register FullExtensionReg;
1622 Register PartialExtensionReg;
1623
1624 // Do the operation on each small part.
1625 for (int i = 0; i < NumParts; ++i) {
1626 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1627 DstRegs.push_back(SrcRegs[i]);
1628 PartialExtensionReg = DstRegs.back();
1629 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1630 assert(PartialExtensionReg &&
1631 "Expected to visit partial extension before full");
1632 if (FullExtensionReg) {
1633 DstRegs.push_back(FullExtensionReg);
1634 continue;
1635 }
1636 DstRegs.push_back(
1637 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1638 .getReg(0));
1639 FullExtensionReg = DstRegs.back();
1640 } else {
1641 DstRegs.push_back(
1643 .buildInstr(
1644 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1645 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1646 .getReg(0));
1647 PartialExtensionReg = DstRegs.back();
1648 }
1649 }
1650
1651 // Gather the destination registers into the final destination.
1652 Register DstReg = MI.getOperand(0).getReg();
1653 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1654 MI.eraseFromParent();
1655 return Legalized;
1656 }
1657 case TargetOpcode::G_BSWAP:
1658 case TargetOpcode::G_BITREVERSE: {
1659 if (SizeOp0 % NarrowSize != 0)
1660 return UnableToLegalize;
1661
1663 SmallVector<Register, 2> SrcRegs, DstRegs;
1664 unsigned NumParts = SizeOp0 / NarrowSize;
1665 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1666 MIRBuilder, MRI);
1667
1668 for (unsigned i = 0; i < NumParts; ++i) {
1669 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1670 {SrcRegs[NumParts - 1 - i]});
1671 DstRegs.push_back(DstPart.getReg(0));
1672 }
1673
1674 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1675
1677 MI.eraseFromParent();
1678 return Legalized;
1679 }
1680 case TargetOpcode::G_PTR_ADD:
1681 case TargetOpcode::G_PTRMASK: {
1682 if (TypeIdx != 1)
1683 return UnableToLegalize;
1685 narrowScalarSrc(MI, NarrowTy, 2);
1687 return Legalized;
1688 }
1689 case TargetOpcode::G_FPTOUI:
1690 case TargetOpcode::G_FPTOSI:
1691 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1692 case TargetOpcode::G_FPEXT:
1693 if (TypeIdx != 0)
1694 return UnableToLegalize;
1696 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1698 return Legalized;
1699 case TargetOpcode::G_FLDEXP:
1700 case TargetOpcode::G_STRICT_FLDEXP:
1701 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1702 }
1703}
1704
1706 LLT Ty = MRI.getType(Val);
1707 if (Ty.isScalar())
1708 return Val;
1709
1711 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1712 if (Ty.isPointer()) {
1713 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1714 return Register();
1715 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1716 }
1717
1718 Register NewVal = Val;
1719
1720 assert(Ty.isVector());
1721 if (Ty.isPointerVector())
1722 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1723 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1724}
1725
1727 unsigned OpIdx, unsigned ExtOpcode) {
1728 MachineOperand &MO = MI.getOperand(OpIdx);
1729 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1730 MO.setReg(ExtB.getReg(0));
1731}
1732
1734 unsigned OpIdx) {
1735 MachineOperand &MO = MI.getOperand(OpIdx);
1736 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1737 MO.setReg(ExtB.getReg(0));
1738}
1739
1741 unsigned OpIdx, unsigned TruncOpcode) {
1742 MachineOperand &MO = MI.getOperand(OpIdx);
1743 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1745 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1746 MO.setReg(DstExt);
1747}
1748
1750 unsigned OpIdx, unsigned ExtOpcode) {
1751 MachineOperand &MO = MI.getOperand(OpIdx);
1752 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1754 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1755 MO.setReg(DstTrunc);
1756}
1757
1759 unsigned OpIdx) {
1760 MachineOperand &MO = MI.getOperand(OpIdx);
1762 Register Dst = MO.getReg();
1763 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1764 MO.setReg(DstExt);
1766}
1767
1769 unsigned OpIdx) {
1770 MachineOperand &MO = MI.getOperand(OpIdx);
1773}
1774
1775void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1776 MachineOperand &Op = MI.getOperand(OpIdx);
1777 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1778}
1779
1780void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1781 MachineOperand &MO = MI.getOperand(OpIdx);
1782 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1784 MIRBuilder.buildBitcast(MO, CastDst);
1785 MO.setReg(CastDst);
1786}
1787
1789LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1790 LLT WideTy) {
1791 if (TypeIdx != 1)
1792 return UnableToLegalize;
1793
1794 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1795 if (DstTy.isVector())
1796 return UnableToLegalize;
1797
1798 LLT SrcTy = MRI.getType(Src1Reg);
1799 const int DstSize = DstTy.getSizeInBits();
1800 const int SrcSize = SrcTy.getSizeInBits();
1801 const int WideSize = WideTy.getSizeInBits();
1802 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1803
1804 unsigned NumOps = MI.getNumOperands();
1805 unsigned NumSrc = MI.getNumOperands() - 1;
1806 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1807
1808 if (WideSize >= DstSize) {
1809 // Directly pack the bits in the target type.
1810 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1811
1812 for (unsigned I = 2; I != NumOps; ++I) {
1813 const unsigned Offset = (I - 1) * PartSize;
1814
1815 Register SrcReg = MI.getOperand(I).getReg();
1816 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1817
1818 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1819
1820 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1821 MRI.createGenericVirtualRegister(WideTy);
1822
1823 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1824 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1825 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1826 ResultReg = NextResult;
1827 }
1828
1829 if (WideSize > DstSize)
1830 MIRBuilder.buildTrunc(DstReg, ResultReg);
1831 else if (DstTy.isPointer())
1832 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1833
1834 MI.eraseFromParent();
1835 return Legalized;
1836 }
1837
1838 // Unmerge the original values to the GCD type, and recombine to the next
1839 // multiple greater than the original type.
1840 //
1841 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1842 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1843 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1844 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1845 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1846 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1847 // %12:_(s12) = G_MERGE_VALUES %10, %11
1848 //
1849 // Padding with undef if necessary:
1850 //
1851 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1852 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1853 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1854 // %7:_(s2) = G_IMPLICIT_DEF
1855 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1856 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1857 // %10:_(s12) = G_MERGE_VALUES %8, %9
1858
1859 const int GCD = std::gcd(SrcSize, WideSize);
1860 LLT GCDTy = LLT::scalar(GCD);
1861
1863 SmallVector<Register, 8> NewMergeRegs;
1864 SmallVector<Register, 8> Unmerges;
1865 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1866
1867 // Decompose the original operands if they don't evenly divide.
1868 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1869 Register SrcReg = MO.getReg();
1870 if (GCD == SrcSize) {
1871 Unmerges.push_back(SrcReg);
1872 } else {
1873 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1874 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1875 Unmerges.push_back(Unmerge.getReg(J));
1876 }
1877 }
1878
1879 // Pad with undef to the next size that is a multiple of the requested size.
1880 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1881 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1882 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1883 Unmerges.push_back(UndefReg);
1884 }
1885
1886 const int PartsPerGCD = WideSize / GCD;
1887
1888 // Build merges of each piece.
1889 ArrayRef<Register> Slicer(Unmerges);
1890 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1891 auto Merge =
1892 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1893 NewMergeRegs.push_back(Merge.getReg(0));
1894 }
1895
1896 // A truncate may be necessary if the requested type doesn't evenly divide the
1897 // original result type.
1898 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1899 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1900 } else {
1901 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1902 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1903 }
1904
1905 MI.eraseFromParent();
1906 return Legalized;
1907}
1908
1910LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1911 LLT WideTy) {
1912 if (TypeIdx != 0)
1913 return UnableToLegalize;
1914
1915 int NumDst = MI.getNumOperands() - 1;
1916 Register SrcReg = MI.getOperand(NumDst).getReg();
1917 LLT SrcTy = MRI.getType(SrcReg);
1918 if (SrcTy.isVector())
1919 return UnableToLegalize;
1920
1921 Register Dst0Reg = MI.getOperand(0).getReg();
1922 LLT DstTy = MRI.getType(Dst0Reg);
1923 if (!DstTy.isScalar())
1924 return UnableToLegalize;
1925
1926 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1927 if (SrcTy.isPointer()) {
1929 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1930 LLVM_DEBUG(
1931 dbgs() << "Not casting non-integral address space integer\n");
1932 return UnableToLegalize;
1933 }
1934
1935 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1936 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1937 }
1938
1939 // Widen SrcTy to WideTy. This does not affect the result, but since the
1940 // user requested this size, it is probably better handled than SrcTy and
1941 // should reduce the total number of legalization artifacts.
1942 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1943 SrcTy = WideTy;
1944 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1945 }
1946
1947 // Theres no unmerge type to target. Directly extract the bits from the
1948 // source type
1949 unsigned DstSize = DstTy.getSizeInBits();
1950
1951 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1952 for (int I = 1; I != NumDst; ++I) {
1953 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1954 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1955 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1956 }
1957
1958 MI.eraseFromParent();
1959 return Legalized;
1960 }
1961
1962 // Extend the source to a wider type.
1963 LLT LCMTy = getLCMType(SrcTy, WideTy);
1964
1965 Register WideSrc = SrcReg;
1966 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1967 // TODO: If this is an integral address space, cast to integer and anyext.
1968 if (SrcTy.isPointer()) {
1969 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1970 return UnableToLegalize;
1971 }
1972
1973 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1974 }
1975
1976 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1977
1978 // Create a sequence of unmerges and merges to the original results. Since we
1979 // may have widened the source, we will need to pad the results with dead defs
1980 // to cover the source register.
1981 // e.g. widen s48 to s64:
1982 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1983 //
1984 // =>
1985 // %4:_(s192) = G_ANYEXT %0:_(s96)
1986 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1987 // ; unpack to GCD type, with extra dead defs
1988 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1989 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1990 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1991 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1992 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1993 const LLT GCDTy = getGCDType(WideTy, DstTy);
1994 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1995 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1996
1997 // Directly unmerge to the destination without going through a GCD type
1998 // if possible
1999 if (PartsPerRemerge == 1) {
2000 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2001
2002 for (int I = 0; I != NumUnmerge; ++I) {
2003 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2004
2005 for (int J = 0; J != PartsPerUnmerge; ++J) {
2006 int Idx = I * PartsPerUnmerge + J;
2007 if (Idx < NumDst)
2008 MIB.addDef(MI.getOperand(Idx).getReg());
2009 else {
2010 // Create dead def for excess components.
2011 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2012 }
2013 }
2014
2015 MIB.addUse(Unmerge.getReg(I));
2016 }
2017 } else {
2019 for (int J = 0; J != NumUnmerge; ++J)
2020 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2021
2022 SmallVector<Register, 8> RemergeParts;
2023 for (int I = 0; I != NumDst; ++I) {
2024 for (int J = 0; J < PartsPerRemerge; ++J) {
2025 const int Idx = I * PartsPerRemerge + J;
2026 RemergeParts.emplace_back(Parts[Idx]);
2027 }
2028
2029 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2030 RemergeParts.clear();
2031 }
2032 }
2033
2034 MI.eraseFromParent();
2035 return Legalized;
2036}
2037
2039LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2040 LLT WideTy) {
2041 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2042 unsigned Offset = MI.getOperand(2).getImm();
2043
2044 if (TypeIdx == 0) {
2045 if (SrcTy.isVector() || DstTy.isVector())
2046 return UnableToLegalize;
2047
2048 SrcOp Src(SrcReg);
2049 if (SrcTy.isPointer()) {
2050 // Extracts from pointers can be handled only if they are really just
2051 // simple integers.
2053 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2054 return UnableToLegalize;
2055
2056 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2057 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2058 SrcTy = SrcAsIntTy;
2059 }
2060
2061 if (DstTy.isPointer())
2062 return UnableToLegalize;
2063
2064 if (Offset == 0) {
2065 // Avoid a shift in the degenerate case.
2066 MIRBuilder.buildTrunc(DstReg,
2067 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2068 MI.eraseFromParent();
2069 return Legalized;
2070 }
2071
2072 // Do a shift in the source type.
2073 LLT ShiftTy = SrcTy;
2074 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2075 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2076 ShiftTy = WideTy;
2077 }
2078
2079 auto LShr = MIRBuilder.buildLShr(
2080 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2081 MIRBuilder.buildTrunc(DstReg, LShr);
2082 MI.eraseFromParent();
2083 return Legalized;
2084 }
2085
2086 if (SrcTy.isScalar()) {
2088 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2090 return Legalized;
2091 }
2092
2093 if (!SrcTy.isVector())
2094 return UnableToLegalize;
2095
2096 if (DstTy != SrcTy.getElementType())
2097 return UnableToLegalize;
2098
2099 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2100 return UnableToLegalize;
2101
2103 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2104
2105 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2106 Offset);
2107 widenScalarDst(MI, WideTy.getScalarType(), 0);
2109 return Legalized;
2110}
2111
2113LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2114 LLT WideTy) {
2115 if (TypeIdx != 0 || WideTy.isVector())
2116 return UnableToLegalize;
2118 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2119 widenScalarDst(MI, WideTy);
2121 return Legalized;
2122}
2123
2125LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2126 LLT WideTy) {
2127 unsigned Opcode;
2128 unsigned ExtOpcode;
2129 std::optional<Register> CarryIn;
2130 switch (MI.getOpcode()) {
2131 default:
2132 llvm_unreachable("Unexpected opcode!");
2133 case TargetOpcode::G_SADDO:
2134 Opcode = TargetOpcode::G_ADD;
2135 ExtOpcode = TargetOpcode::G_SEXT;
2136 break;
2137 case TargetOpcode::G_SSUBO:
2138 Opcode = TargetOpcode::G_SUB;
2139 ExtOpcode = TargetOpcode::G_SEXT;
2140 break;
2141 case TargetOpcode::G_UADDO:
2142 Opcode = TargetOpcode::G_ADD;
2143 ExtOpcode = TargetOpcode::G_ZEXT;
2144 break;
2145 case TargetOpcode::G_USUBO:
2146 Opcode = TargetOpcode::G_SUB;
2147 ExtOpcode = TargetOpcode::G_ZEXT;
2148 break;
2149 case TargetOpcode::G_SADDE:
2150 Opcode = TargetOpcode::G_UADDE;
2151 ExtOpcode = TargetOpcode::G_SEXT;
2152 CarryIn = MI.getOperand(4).getReg();
2153 break;
2154 case TargetOpcode::G_SSUBE:
2155 Opcode = TargetOpcode::G_USUBE;
2156 ExtOpcode = TargetOpcode::G_SEXT;
2157 CarryIn = MI.getOperand(4).getReg();
2158 break;
2159 case TargetOpcode::G_UADDE:
2160 Opcode = TargetOpcode::G_UADDE;
2161 ExtOpcode = TargetOpcode::G_ZEXT;
2162 CarryIn = MI.getOperand(4).getReg();
2163 break;
2164 case TargetOpcode::G_USUBE:
2165 Opcode = TargetOpcode::G_USUBE;
2166 ExtOpcode = TargetOpcode::G_ZEXT;
2167 CarryIn = MI.getOperand(4).getReg();
2168 break;
2169 }
2170
2171 if (TypeIdx == 1) {
2172 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2173
2175 if (CarryIn)
2176 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2177 widenScalarDst(MI, WideTy, 1);
2178
2180 return Legalized;
2181 }
2182
2183 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2184 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2185 // Do the arithmetic in the larger type.
2186 Register NewOp;
2187 if (CarryIn) {
2188 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2189 NewOp = MIRBuilder
2190 .buildInstr(Opcode, {WideTy, CarryOutTy},
2191 {LHSExt, RHSExt, *CarryIn})
2192 .getReg(0);
2193 } else {
2194 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2195 }
2196 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2197 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2198 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2199 // There is no overflow if the ExtOp is the same as NewOp.
2200 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2201 // Now trunc the NewOp to the original result.
2202 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2203 MI.eraseFromParent();
2204 return Legalized;
2205}
2206
2208LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2209 LLT WideTy) {
2210 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2211 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2212 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2213 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2214 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2215 // We can convert this to:
2216 // 1. Any extend iN to iM
2217 // 2. SHL by M-N
2218 // 3. [US][ADD|SUB|SHL]SAT
2219 // 4. L/ASHR by M-N
2220 //
2221 // It may be more efficient to lower this to a min and a max operation in
2222 // the higher precision arithmetic if the promoted operation isn't legal,
2223 // but this decision is up to the target's lowering request.
2224 Register DstReg = MI.getOperand(0).getReg();
2225
2226 unsigned NewBits = WideTy.getScalarSizeInBits();
2227 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2228
2229 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2230 // must not left shift the RHS to preserve the shift amount.
2231 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2232 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2233 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2234 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2235 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2236 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2237
2238 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2239 {ShiftL, ShiftR}, MI.getFlags());
2240
2241 // Use a shift that will preserve the number of sign bits when the trunc is
2242 // folded away.
2243 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2244 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2245
2246 MIRBuilder.buildTrunc(DstReg, Result);
2247 MI.eraseFromParent();
2248 return Legalized;
2249}
2250
2252LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2253 LLT WideTy) {
2254 if (TypeIdx == 1) {
2256 widenScalarDst(MI, WideTy, 1);
2258 return Legalized;
2259 }
2260
2261 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2262 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2263 LLT SrcTy = MRI.getType(LHS);
2264 LLT OverflowTy = MRI.getType(OriginalOverflow);
2265 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2266
2267 // To determine if the result overflowed in the larger type, we extend the
2268 // input to the larger type, do the multiply (checking if it overflows),
2269 // then also check the high bits of the result to see if overflow happened
2270 // there.
2271 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2272 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2273 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2274
2275 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2276 // so we don't need to check the overflow result of larger type Mulo.
2277 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2278
2279 unsigned MulOpc =
2280 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2281
2283 if (WideMulCanOverflow)
2284 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2285 {LeftOperand, RightOperand});
2286 else
2287 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2288
2289 auto Mul = Mulo->getOperand(0);
2290 MIRBuilder.buildTrunc(Result, Mul);
2291
2292 MachineInstrBuilder ExtResult;
2293 // Overflow occurred if it occurred in the larger type, or if the high part
2294 // of the result does not zero/sign-extend the low part. Check this second
2295 // possibility first.
2296 if (IsSigned) {
2297 // For signed, overflow occurred when the high part does not sign-extend
2298 // the low part.
2299 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2300 } else {
2301 // Unsigned overflow occurred when the high part does not zero-extend the
2302 // low part.
2303 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2304 }
2305
2306 if (WideMulCanOverflow) {
2307 auto Overflow =
2308 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2309 // Finally check if the multiplication in the larger type itself overflowed.
2310 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2311 } else {
2312 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2313 }
2314 MI.eraseFromParent();
2315 return Legalized;
2316}
2317
2320 switch (MI.getOpcode()) {
2321 default:
2322 return UnableToLegalize;
2323 case TargetOpcode::G_ATOMICRMW_XCHG:
2324 case TargetOpcode::G_ATOMICRMW_ADD:
2325 case TargetOpcode::G_ATOMICRMW_SUB:
2326 case TargetOpcode::G_ATOMICRMW_AND:
2327 case TargetOpcode::G_ATOMICRMW_OR:
2328 case TargetOpcode::G_ATOMICRMW_XOR:
2329 case TargetOpcode::G_ATOMICRMW_MIN:
2330 case TargetOpcode::G_ATOMICRMW_MAX:
2331 case TargetOpcode::G_ATOMICRMW_UMIN:
2332 case TargetOpcode::G_ATOMICRMW_UMAX:
2333 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2335 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2336 widenScalarDst(MI, WideTy, 0);
2338 return Legalized;
2339 case TargetOpcode::G_ATOMIC_CMPXCHG:
2340 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2342 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2343 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2344 widenScalarDst(MI, WideTy, 0);
2346 return Legalized;
2347 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2348 if (TypeIdx == 0) {
2350 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2351 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2352 widenScalarDst(MI, WideTy, 0);
2354 return Legalized;
2355 }
2356 assert(TypeIdx == 1 &&
2357 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2359 widenScalarDst(MI, WideTy, 1);
2361 return Legalized;
2362 case TargetOpcode::G_EXTRACT:
2363 return widenScalarExtract(MI, TypeIdx, WideTy);
2364 case TargetOpcode::G_INSERT:
2365 return widenScalarInsert(MI, TypeIdx, WideTy);
2366 case TargetOpcode::G_MERGE_VALUES:
2367 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2368 case TargetOpcode::G_UNMERGE_VALUES:
2369 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2370 case TargetOpcode::G_SADDO:
2371 case TargetOpcode::G_SSUBO:
2372 case TargetOpcode::G_UADDO:
2373 case TargetOpcode::G_USUBO:
2374 case TargetOpcode::G_SADDE:
2375 case TargetOpcode::G_SSUBE:
2376 case TargetOpcode::G_UADDE:
2377 case TargetOpcode::G_USUBE:
2378 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2379 case TargetOpcode::G_UMULO:
2380 case TargetOpcode::G_SMULO:
2381 return widenScalarMulo(MI, TypeIdx, WideTy);
2382 case TargetOpcode::G_SADDSAT:
2383 case TargetOpcode::G_SSUBSAT:
2384 case TargetOpcode::G_SSHLSAT:
2385 case TargetOpcode::G_UADDSAT:
2386 case TargetOpcode::G_USUBSAT:
2387 case TargetOpcode::G_USHLSAT:
2388 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2389 case TargetOpcode::G_CTTZ:
2390 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2391 case TargetOpcode::G_CTLZ:
2392 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2393 case TargetOpcode::G_CTPOP: {
2394 if (TypeIdx == 0) {
2396 widenScalarDst(MI, WideTy, 0);
2398 return Legalized;
2399 }
2400
2401 Register SrcReg = MI.getOperand(1).getReg();
2402
2403 // First extend the input.
2404 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2405 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2406 ? TargetOpcode::G_ANYEXT
2407 : TargetOpcode::G_ZEXT;
2408 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2409 LLT CurTy = MRI.getType(SrcReg);
2410 unsigned NewOpc = MI.getOpcode();
2411 if (NewOpc == TargetOpcode::G_CTTZ) {
2412 // The count is the same in the larger type except if the original
2413 // value was zero. This can be handled by setting the bit just off
2414 // the top of the original type.
2415 auto TopBit =
2417 MIBSrc = MIRBuilder.buildOr(
2418 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2419 // Now we know the operand is non-zero, use the more relaxed opcode.
2420 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2421 }
2422
2423 // Perform the operation at the larger size.
2424 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2425 // This is already the correct result for CTPOP and CTTZs
2426 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2427 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2428 // The correct result is NewOp - (Difference in widety and current ty).
2429 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2430 MIBNewOp = MIRBuilder.buildSub(
2431 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2432 }
2433
2434 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2435 MI.eraseFromParent();
2436 return Legalized;
2437 }
2438 case TargetOpcode::G_BSWAP: {
2440 Register DstReg = MI.getOperand(0).getReg();
2441
2442 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2443 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2444 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2445 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2446
2447 MI.getOperand(0).setReg(DstExt);
2448
2450
2451 LLT Ty = MRI.getType(DstReg);
2452 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2453 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2454 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2455
2456 MIRBuilder.buildTrunc(DstReg, ShrReg);
2458 return Legalized;
2459 }
2460 case TargetOpcode::G_BITREVERSE: {
2462
2463 Register DstReg = MI.getOperand(0).getReg();
2464 LLT Ty = MRI.getType(DstReg);
2465 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2466
2467 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2468 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2469 MI.getOperand(0).setReg(DstExt);
2471
2472 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2473 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2474 MIRBuilder.buildTrunc(DstReg, Shift);
2476 return Legalized;
2477 }
2478 case TargetOpcode::G_FREEZE:
2480 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2481 widenScalarDst(MI, WideTy);
2483 return Legalized;
2484
2485 case TargetOpcode::G_ABS:
2487 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2488 widenScalarDst(MI, WideTy);
2490 return Legalized;
2491
2492 case TargetOpcode::G_ADD:
2493 case TargetOpcode::G_AND:
2494 case TargetOpcode::G_MUL:
2495 case TargetOpcode::G_OR:
2496 case TargetOpcode::G_XOR:
2497 case TargetOpcode::G_SUB:
2498 // Perform operation at larger width (any extension is fines here, high bits
2499 // don't affect the result) and then truncate the result back to the
2500 // original type.
2502 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2503 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2504 widenScalarDst(MI, WideTy);
2506 return Legalized;
2507
2508 case TargetOpcode::G_SBFX:
2509 case TargetOpcode::G_UBFX:
2511
2512 if (TypeIdx == 0) {
2513 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2514 widenScalarDst(MI, WideTy);
2515 } else {
2516 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2517 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2518 }
2519
2521 return Legalized;
2522
2523 case TargetOpcode::G_SHL:
2525
2526 if (TypeIdx == 0) {
2527 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2528 widenScalarDst(MI, WideTy);
2529 } else {
2530 assert(TypeIdx == 1);
2531 // The "number of bits to shift" operand must preserve its value as an
2532 // unsigned integer:
2533 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2534 }
2535
2537 return Legalized;
2538
2539 case TargetOpcode::G_ROTR:
2540 case TargetOpcode::G_ROTL:
2541 if (TypeIdx != 1)
2542 return UnableToLegalize;
2543
2545 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2547 return Legalized;
2548
2549 case TargetOpcode::G_SDIV:
2550 case TargetOpcode::G_SREM:
2551 case TargetOpcode::G_SMIN:
2552 case TargetOpcode::G_SMAX:
2554 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2555 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2556 widenScalarDst(MI, WideTy);
2558 return Legalized;
2559
2560 case TargetOpcode::G_SDIVREM:
2562 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2563 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2564 widenScalarDst(MI, WideTy);
2565 widenScalarDst(MI, WideTy, 1);
2567 return Legalized;
2568
2569 case TargetOpcode::G_ASHR:
2570 case TargetOpcode::G_LSHR:
2572
2573 if (TypeIdx == 0) {
2574 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2575 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2576
2577 widenScalarSrc(MI, WideTy, 1, CvtOp);
2578 widenScalarDst(MI, WideTy);
2579 } else {
2580 assert(TypeIdx == 1);
2581 // The "number of bits to shift" operand must preserve its value as an
2582 // unsigned integer:
2583 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2584 }
2585
2587 return Legalized;
2588 case TargetOpcode::G_UDIV:
2589 case TargetOpcode::G_UREM:
2590 case TargetOpcode::G_UMIN:
2591 case TargetOpcode::G_UMAX:
2593 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2594 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2595 widenScalarDst(MI, WideTy);
2597 return Legalized;
2598
2599 case TargetOpcode::G_UDIVREM:
2601 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2602 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2603 widenScalarDst(MI, WideTy);
2604 widenScalarDst(MI, WideTy, 1);
2606 return Legalized;
2607
2608 case TargetOpcode::G_SELECT:
2610 if (TypeIdx == 0) {
2611 // Perform operation at larger width (any extension is fine here, high
2612 // bits don't affect the result) and then truncate the result back to the
2613 // original type.
2614 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2615 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2616 widenScalarDst(MI, WideTy);
2617 } else {
2618 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2619 // Explicit extension is required here since high bits affect the result.
2620 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2621 }
2623 return Legalized;
2624
2625 case TargetOpcode::G_FPTOSI:
2626 case TargetOpcode::G_FPTOUI:
2627 case TargetOpcode::G_IS_FPCLASS:
2629
2630 if (TypeIdx == 0)
2631 widenScalarDst(MI, WideTy);
2632 else
2633 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2634
2636 return Legalized;
2637 case TargetOpcode::G_SITOFP:
2639
2640 if (TypeIdx == 0)
2641 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2642 else
2643 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2644
2646 return Legalized;
2647 case TargetOpcode::G_UITOFP:
2649
2650 if (TypeIdx == 0)
2651 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2652 else
2653 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2654
2656 return Legalized;
2657 case TargetOpcode::G_LOAD:
2658 case TargetOpcode::G_SEXTLOAD:
2659 case TargetOpcode::G_ZEXTLOAD:
2661 widenScalarDst(MI, WideTy);
2663 return Legalized;
2664
2665 case TargetOpcode::G_STORE: {
2666 if (TypeIdx != 0)
2667 return UnableToLegalize;
2668
2669 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2670 if (!Ty.isScalar())
2671 return UnableToLegalize;
2672
2674
2675 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2676 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2677 widenScalarSrc(MI, WideTy, 0, ExtType);
2678
2680 return Legalized;
2681 }
2682 case TargetOpcode::G_CONSTANT: {
2683 MachineOperand &SrcMO = MI.getOperand(1);
2685 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2686 MRI.getType(MI.getOperand(0).getReg()));
2687 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2688 ExtOpc == TargetOpcode::G_ANYEXT) &&
2689 "Illegal Extend");
2690 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2691 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2692 ? SrcVal.sext(WideTy.getSizeInBits())
2693 : SrcVal.zext(WideTy.getSizeInBits());
2695 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2696
2697 widenScalarDst(MI, WideTy);
2699 return Legalized;
2700 }
2701 case TargetOpcode::G_FCONSTANT: {
2702 // To avoid changing the bits of the constant due to extension to a larger
2703 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2704 MachineOperand &SrcMO = MI.getOperand(1);
2705 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2707 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2708 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2709 MI.eraseFromParent();
2710 return Legalized;
2711 }
2712 case TargetOpcode::G_IMPLICIT_DEF: {
2714 widenScalarDst(MI, WideTy);
2716 return Legalized;
2717 }
2718 case TargetOpcode::G_BRCOND:
2720 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2722 return Legalized;
2723
2724 case TargetOpcode::G_FCMP:
2726 if (TypeIdx == 0)
2727 widenScalarDst(MI, WideTy);
2728 else {
2729 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2730 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2731 }
2733 return Legalized;
2734
2735 case TargetOpcode::G_ICMP:
2737 if (TypeIdx == 0)
2738 widenScalarDst(MI, WideTy);
2739 else {
2740 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2741 MI.getOperand(1).getPredicate()))
2742 ? TargetOpcode::G_SEXT
2743 : TargetOpcode::G_ZEXT;
2744 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2745 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2746 }
2748 return Legalized;
2749
2750 case TargetOpcode::G_PTR_ADD:
2751 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2753 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2755 return Legalized;
2756
2757 case TargetOpcode::G_PHI: {
2758 assert(TypeIdx == 0 && "Expecting only Idx 0");
2759
2761 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2762 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2764 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2765 }
2766
2767 MachineBasicBlock &MBB = *MI.getParent();
2769 widenScalarDst(MI, WideTy);
2771 return Legalized;
2772 }
2773 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2774 if (TypeIdx == 0) {
2775 Register VecReg = MI.getOperand(1).getReg();
2776 LLT VecTy = MRI.getType(VecReg);
2778
2780 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2781 TargetOpcode::G_ANYEXT);
2782
2783 widenScalarDst(MI, WideTy, 0);
2785 return Legalized;
2786 }
2787
2788 if (TypeIdx != 2)
2789 return UnableToLegalize;
2791 // TODO: Probably should be zext
2792 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2794 return Legalized;
2795 }
2796 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2797 if (TypeIdx == 0) {
2799 const LLT WideEltTy = WideTy.getElementType();
2800
2801 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2802 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2803 widenScalarDst(MI, WideTy, 0);
2805 return Legalized;
2806 }
2807
2808 if (TypeIdx == 1) {
2810
2811 Register VecReg = MI.getOperand(1).getReg();
2812 LLT VecTy = MRI.getType(VecReg);
2813 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2814
2815 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2816 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2817 widenScalarDst(MI, WideVecTy, 0);
2819 return Legalized;
2820 }
2821
2822 if (TypeIdx == 2) {
2824 // TODO: Probably should be zext
2825 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2827 return Legalized;
2828 }
2829
2830 return UnableToLegalize;
2831 }
2832 case TargetOpcode::G_FADD:
2833 case TargetOpcode::G_FMUL:
2834 case TargetOpcode::G_FSUB:
2835 case TargetOpcode::G_FMA:
2836 case TargetOpcode::G_FMAD:
2837 case TargetOpcode::G_FNEG:
2838 case TargetOpcode::G_FABS:
2839 case TargetOpcode::G_FCANONICALIZE:
2840 case TargetOpcode::G_FMINNUM:
2841 case TargetOpcode::G_FMAXNUM:
2842 case TargetOpcode::G_FMINNUM_IEEE:
2843 case TargetOpcode::G_FMAXNUM_IEEE:
2844 case TargetOpcode::G_FMINIMUM:
2845 case TargetOpcode::G_FMAXIMUM:
2846 case TargetOpcode::G_FDIV:
2847 case TargetOpcode::G_FREM:
2848 case TargetOpcode::G_FCEIL:
2849 case TargetOpcode::G_FFLOOR:
2850 case TargetOpcode::G_FCOS:
2851 case TargetOpcode::G_FSIN:
2852 case TargetOpcode::G_FLOG10:
2853 case TargetOpcode::G_FLOG:
2854 case TargetOpcode::G_FLOG2:
2855 case TargetOpcode::G_FRINT:
2856 case TargetOpcode::G_FNEARBYINT:
2857 case TargetOpcode::G_FSQRT:
2858 case TargetOpcode::G_FEXP:
2859 case TargetOpcode::G_FEXP2:
2860 case TargetOpcode::G_FEXP10:
2861 case TargetOpcode::G_FPOW:
2862 case TargetOpcode::G_INTRINSIC_TRUNC:
2863 case TargetOpcode::G_INTRINSIC_ROUND:
2864 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2865 assert(TypeIdx == 0);
2867
2868 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2869 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2870
2871 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2873 return Legalized;
2874 case TargetOpcode::G_FPOWI:
2875 case TargetOpcode::G_FLDEXP:
2876 case TargetOpcode::G_STRICT_FLDEXP: {
2877 if (TypeIdx == 0) {
2878 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2879 return UnableToLegalize;
2880
2882 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2883 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2885 return Legalized;
2886 }
2887
2888 if (TypeIdx == 1) {
2889 // For some reason SelectionDAG tries to promote to a libcall without
2890 // actually changing the integer type for promotion.
2892 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2894 return Legalized;
2895 }
2896
2897 return UnableToLegalize;
2898 }
2899 case TargetOpcode::G_FFREXP: {
2901
2902 if (TypeIdx == 0) {
2903 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2904 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2905 } else {
2906 widenScalarDst(MI, WideTy, 1);
2907 }
2908
2910 return Legalized;
2911 }
2912 case TargetOpcode::G_INTTOPTR:
2913 if (TypeIdx != 1)
2914 return UnableToLegalize;
2915
2917 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2919 return Legalized;
2920 case TargetOpcode::G_PTRTOINT:
2921 if (TypeIdx != 0)
2922 return UnableToLegalize;
2923
2925 widenScalarDst(MI, WideTy, 0);
2927 return Legalized;
2928 case TargetOpcode::G_BUILD_VECTOR: {
2930
2931 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2932 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2933 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2934
2935 // Avoid changing the result vector type if the source element type was
2936 // requested.
2937 if (TypeIdx == 1) {
2938 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2939 } else {
2940 widenScalarDst(MI, WideTy, 0);
2941 }
2942
2944 return Legalized;
2945 }
2946 case TargetOpcode::G_SEXT_INREG:
2947 if (TypeIdx != 0)
2948 return UnableToLegalize;
2949
2951 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2952 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2954 return Legalized;
2955 case TargetOpcode::G_PTRMASK: {
2956 if (TypeIdx != 1)
2957 return UnableToLegalize;
2959 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2961 return Legalized;
2962 }
2963 case TargetOpcode::G_VECREDUCE_FADD:
2964 case TargetOpcode::G_VECREDUCE_FMUL:
2965 case TargetOpcode::G_VECREDUCE_FMIN:
2966 case TargetOpcode::G_VECREDUCE_FMAX:
2967 case TargetOpcode::G_VECREDUCE_FMINIMUM:
2968 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
2969 if (TypeIdx != 0)
2970 return UnableToLegalize;
2972 Register VecReg = MI.getOperand(1).getReg();
2973 LLT VecTy = MRI.getType(VecReg);
2974 LLT WideVecTy = VecTy.isVector()
2975 ? LLT::vector(VecTy.getElementCount(), WideTy)
2976 : WideTy;
2977 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
2978 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2980 return Legalized;
2981 }
2982}
2983
2985 MachineIRBuilder &B, Register Src, LLT Ty) {
2986 auto Unmerge = B.buildUnmerge(Ty, Src);
2987 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2988 Pieces.push_back(Unmerge.getReg(I));
2989}
2990
2991static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
2992 MachineIRBuilder &MIRBuilder) {
2993 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2994 MachineFunction &MF = MIRBuilder.getMF();
2995 const DataLayout &DL = MIRBuilder.getDataLayout();
2996 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
2997 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
2998 LLT DstLLT = MRI.getType(DstReg);
2999
3000 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3001
3002 auto Addr = MIRBuilder.buildConstantPool(
3003 AddrPtrTy,
3004 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3005
3006 MachineMemOperand *MMO =
3008 MachineMemOperand::MOLoad, DstLLT, Alignment);
3009
3010 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3011}
3012
3015 const MachineOperand &ConstOperand = MI.getOperand(1);
3016 const Constant *ConstantVal = ConstOperand.getCImm();
3017
3018 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3019 MI.eraseFromParent();
3020
3021 return Legalized;
3022}
3023
3026 const MachineOperand &ConstOperand = MI.getOperand(1);
3027 const Constant *ConstantVal = ConstOperand.getFPImm();
3028
3029 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3030 MI.eraseFromParent();
3031
3032 return Legalized;
3033}
3034
3037 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3038 if (SrcTy.isVector()) {
3039 LLT SrcEltTy = SrcTy.getElementType();
3041
3042 if (DstTy.isVector()) {
3043 int NumDstElt = DstTy.getNumElements();
3044 int NumSrcElt = SrcTy.getNumElements();
3045
3046 LLT DstEltTy = DstTy.getElementType();
3047 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3048 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3049
3050 // If there's an element size mismatch, insert intermediate casts to match
3051 // the result element type.
3052 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3053 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3054 //
3055 // =>
3056 //
3057 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3058 // %3:_(<2 x s8>) = G_BITCAST %2
3059 // %4:_(<2 x s8>) = G_BITCAST %3
3060 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3061 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3062 SrcPartTy = SrcEltTy;
3063 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3064 //
3065 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3066 //
3067 // =>
3068 //
3069 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3070 // %3:_(s16) = G_BITCAST %2
3071 // %4:_(s16) = G_BITCAST %3
3072 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3073 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3074 DstCastTy = DstEltTy;
3075 }
3076
3077 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3078 for (Register &SrcReg : SrcRegs)
3079 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3080 } else
3081 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3082
3083 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3084 MI.eraseFromParent();
3085 return Legalized;
3086 }
3087
3088 if (DstTy.isVector()) {
3090 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3091 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3092 MI.eraseFromParent();
3093 return Legalized;
3094 }
3095
3096 return UnableToLegalize;
3097}
3098
3099/// Figure out the bit offset into a register when coercing a vector index for
3100/// the wide element type. This is only for the case when promoting vector to
3101/// one with larger elements.
3102//
3103///
3104/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3105/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3107 Register Idx,
3108 unsigned NewEltSize,
3109 unsigned OldEltSize) {
3110 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3111 LLT IdxTy = B.getMRI()->getType(Idx);
3112
3113 // Now figure out the amount we need to shift to get the target bits.
3114 auto OffsetMask = B.buildConstant(
3115 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3116 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3117 return B.buildShl(IdxTy, OffsetIdx,
3118 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3119}
3120
3121/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3122/// is casting to a vector with a smaller element size, perform multiple element
3123/// extracts and merge the results. If this is coercing to a vector with larger
3124/// elements, index the bitcasted vector and extract the target element with bit
3125/// operations. This is intended to force the indexing in the native register
3126/// size for architectures that can dynamically index the register file.
3129 LLT CastTy) {
3130 if (TypeIdx != 1)
3131 return UnableToLegalize;
3132
3133 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3134
3135 LLT SrcEltTy = SrcVecTy.getElementType();
3136 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3137 unsigned OldNumElts = SrcVecTy.getNumElements();
3138
3139 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3140 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3141
3142 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3143 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3144 if (NewNumElts > OldNumElts) {
3145 // Decreasing the vector element size
3146 //
3147 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3148 // =>
3149 // v4i32:castx = bitcast x:v2i64
3150 //
3151 // i64 = bitcast
3152 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3153 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3154 //
3155 if (NewNumElts % OldNumElts != 0)
3156 return UnableToLegalize;
3157
3158 // Type of the intermediate result vector.
3159 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3160 LLT MidTy =
3161 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3162
3163 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3164
3165 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3166 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3167
3168 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3169 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3170 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3171 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3172 NewOps[I] = Elt.getReg(0);
3173 }
3174
3175 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3176 MIRBuilder.buildBitcast(Dst, NewVec);
3177 MI.eraseFromParent();
3178 return Legalized;
3179 }
3180
3181 if (NewNumElts < OldNumElts) {
3182 if (NewEltSize % OldEltSize != 0)
3183 return UnableToLegalize;
3184
3185 // This only depends on powers of 2 because we use bit tricks to figure out
3186 // the bit offset we need to shift to get the target element. A general
3187 // expansion could emit division/multiply.
3188 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3189 return UnableToLegalize;
3190
3191 // Increasing the vector element size.
3192 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3193 //
3194 // =>
3195 //
3196 // %cast = G_BITCAST %vec
3197 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3198 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3199 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3200 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3201 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3202 // %elt = G_TRUNC %elt_bits
3203
3204 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3205 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3206
3207 // Divide to get the index in the wider element type.
3208 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3209
3210 Register WideElt = CastVec;
3211 if (CastTy.isVector()) {
3212 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3213 ScaledIdx).getReg(0);
3214 }
3215
3216 // Compute the bit offset into the register of the target element.
3218 MIRBuilder, Idx, NewEltSize, OldEltSize);
3219
3220 // Shift the wide element to get the target element.
3221 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3222 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3223 MI.eraseFromParent();
3224 return Legalized;
3225 }
3226
3227 return UnableToLegalize;
3228}
3229
3230/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3231/// TargetReg, while preserving other bits in \p TargetReg.
3232///
3233/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3235 Register TargetReg, Register InsertReg,
3236 Register OffsetBits) {
3237 LLT TargetTy = B.getMRI()->getType(TargetReg);
3238 LLT InsertTy = B.getMRI()->getType(InsertReg);
3239 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3240 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3241
3242 // Produce a bitmask of the value to insert
3243 auto EltMask = B.buildConstant(
3244 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3245 InsertTy.getSizeInBits()));
3246 // Shift it into position
3247 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3248 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3249
3250 // Clear out the bits in the wide element
3251 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3252
3253 // The value to insert has all zeros already, so stick it into the masked
3254 // wide element.
3255 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3256}
3257
3258/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3259/// is increasing the element size, perform the indexing in the target element
3260/// type, and use bit operations to insert at the element position. This is
3261/// intended for architectures that can dynamically index the register file and
3262/// want to force indexing in the native register size.
3265 LLT CastTy) {
3266 if (TypeIdx != 0)
3267 return UnableToLegalize;
3268
3269 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3270 MI.getFirst4RegLLTs();
3271 LLT VecTy = DstTy;
3272
3273 LLT VecEltTy = VecTy.getElementType();
3274 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3275 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3276 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3277
3278 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3279 unsigned OldNumElts = VecTy.getNumElements();
3280
3281 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3282 if (NewNumElts < OldNumElts) {
3283 if (NewEltSize % OldEltSize != 0)
3284 return UnableToLegalize;
3285
3286 // This only depends on powers of 2 because we use bit tricks to figure out
3287 // the bit offset we need to shift to get the target element. A general
3288 // expansion could emit division/multiply.
3289 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3290 return UnableToLegalize;
3291
3292 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3293 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3294
3295 // Divide to get the index in the wider element type.
3296 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3297
3298 Register ExtractedElt = CastVec;
3299 if (CastTy.isVector()) {
3300 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3301 ScaledIdx).getReg(0);
3302 }
3303
3304 // Compute the bit offset into the register of the target element.
3306 MIRBuilder, Idx, NewEltSize, OldEltSize);
3307
3308 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3309 Val, OffsetBits);
3310 if (CastTy.isVector()) {
3312 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3313 }
3314
3315 MIRBuilder.buildBitcast(Dst, InsertedElt);
3316 MI.eraseFromParent();
3317 return Legalized;
3318 }
3319
3320 return UnableToLegalize;
3321}
3322
3324 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3325 Register DstReg = LoadMI.getDstReg();
3326 Register PtrReg = LoadMI.getPointerReg();
3327 LLT DstTy = MRI.getType(DstReg);
3328 MachineMemOperand &MMO = LoadMI.getMMO();
3329 LLT MemTy = MMO.getMemoryType();
3331
3332 unsigned MemSizeInBits = MemTy.getSizeInBits();
3333 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3334
3335 if (MemSizeInBits != MemStoreSizeInBits) {
3336 if (MemTy.isVector())
3337 return UnableToLegalize;
3338
3339 // Promote to a byte-sized load if not loading an integral number of
3340 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3341 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3342 MachineMemOperand *NewMMO =
3343 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3344
3345 Register LoadReg = DstReg;
3346 LLT LoadTy = DstTy;
3347
3348 // If this wasn't already an extending load, we need to widen the result
3349 // register to avoid creating a load with a narrower result than the source.
3350 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3351 LoadTy = WideMemTy;
3352 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3353 }
3354
3355 if (isa<GSExtLoad>(LoadMI)) {
3356 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3357 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3358 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3359 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3360 // The extra bits are guaranteed to be zero, since we stored them that
3361 // way. A zext load from Wide thus automatically gives zext from MemVT.
3362 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3363 } else {
3364 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3365 }
3366
3367 if (DstTy != LoadTy)
3368 MIRBuilder.buildTrunc(DstReg, LoadReg);
3369
3370 LoadMI.eraseFromParent();
3371 return Legalized;
3372 }
3373
3374 // Big endian lowering not implemented.
3376 return UnableToLegalize;
3377
3378 // This load needs splitting into power of 2 sized loads.
3379 //
3380 // Our strategy here is to generate anyextending loads for the smaller
3381 // types up to next power-2 result type, and then combine the two larger
3382 // result values together, before truncating back down to the non-pow-2
3383 // type.
3384 // E.g. v1 = i24 load =>
3385 // v2 = i32 zextload (2 byte)
3386 // v3 = i32 load (1 byte)
3387 // v4 = i32 shl v3, 16
3388 // v5 = i32 or v4, v2
3389 // v1 = i24 trunc v5
3390 // By doing this we generate the correct truncate which should get
3391 // combined away as an artifact with a matching extend.
3392
3393 uint64_t LargeSplitSize, SmallSplitSize;
3394
3395 if (!isPowerOf2_32(MemSizeInBits)) {
3396 // This load needs splitting into power of 2 sized loads.
3397 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3398 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3399 } else {
3400 // This is already a power of 2, but we still need to split this in half.
3401 //
3402 // Assume we're being asked to decompose an unaligned load.
3403 // TODO: If this requires multiple splits, handle them all at once.
3404 auto &Ctx = MF.getFunction().getContext();
3405 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3406 return UnableToLegalize;
3407
3408 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3409 }
3410
3411 if (MemTy.isVector()) {
3412 // TODO: Handle vector extloads
3413 if (MemTy != DstTy)
3414 return UnableToLegalize;
3415
3416 // TODO: We can do better than scalarizing the vector and at least split it
3417 // in half.
3418 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3419 }
3420
3421 MachineMemOperand *LargeMMO =
3422 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3423 MachineMemOperand *SmallMMO =
3424 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3425
3426 LLT PtrTy = MRI.getType(PtrReg);
3427 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3428 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3429 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3430 PtrReg, *LargeMMO);
3431
3432 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3433 LargeSplitSize / 8);
3434 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3435 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3436 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3437 SmallPtr, *SmallMMO);
3438
3439 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3440 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3441
3442 if (AnyExtTy == DstTy)
3443 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3444 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3445 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3446 MIRBuilder.buildTrunc(DstReg, {Or});
3447 } else {
3448 assert(DstTy.isPointer() && "expected pointer");
3449 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3450
3451 // FIXME: We currently consider this to be illegal for non-integral address
3452 // spaces, but we need still need a way to reinterpret the bits.
3453 MIRBuilder.buildIntToPtr(DstReg, Or);
3454 }
3455
3456 LoadMI.eraseFromParent();
3457 return Legalized;
3458}
3459
3461 // Lower a non-power of 2 store into multiple pow-2 stores.
3462 // E.g. split an i24 store into an i16 store + i8 store.
3463 // We do this by first extending the stored value to the next largest power
3464 // of 2 type, and then using truncating stores to store the components.
3465 // By doing this, likewise with G_LOAD, generate an extend that can be
3466 // artifact-combined away instead of leaving behind extracts.
3467 Register SrcReg = StoreMI.getValueReg();
3468 Register PtrReg = StoreMI.getPointerReg();
3469 LLT SrcTy = MRI.getType(SrcReg);
3471 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3472 LLT MemTy = MMO.getMemoryType();
3473
3474 unsigned StoreWidth = MemTy.getSizeInBits();
3475 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3476
3477 if (StoreWidth != StoreSizeInBits) {
3478 if (SrcTy.isVector())
3479 return UnableToLegalize;
3480
3481 // Promote to a byte-sized store with upper bits zero if not
3482 // storing an integral number of bytes. For example, promote
3483 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3484 LLT WideTy = LLT::scalar(StoreSizeInBits);
3485
3486 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3487 // Avoid creating a store with a narrower source than result.
3488 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3489 SrcTy = WideTy;
3490 }
3491
3492 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3493
3494 MachineMemOperand *NewMMO =
3495 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3496 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3497 StoreMI.eraseFromParent();
3498 return Legalized;
3499 }
3500
3501 if (MemTy.isVector()) {
3502 // TODO: Handle vector trunc stores
3503 if (MemTy != SrcTy)
3504 return UnableToLegalize;
3505
3506 // TODO: We can do better than scalarizing the vector and at least split it
3507 // in half.
3508 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3509 }
3510
3511 unsigned MemSizeInBits = MemTy.getSizeInBits();
3512 uint64_t LargeSplitSize, SmallSplitSize;
3513
3514 if (!isPowerOf2_32(MemSizeInBits)) {
3515 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3516 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3517 } else {
3518 auto &Ctx = MF.getFunction().getContext();
3519 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3520 return UnableToLegalize; // Don't know what we're being asked to do.
3521
3522 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3523 }
3524
3525 // Extend to the next pow-2. If this store was itself the result of lowering,
3526 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3527 // that's wider than the stored size.
3528 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3529 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3530
3531 if (SrcTy.isPointer()) {
3532 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3533 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3534 }
3535
3536 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3537
3538 // Obtain the smaller value by shifting away the larger value.
3539 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3540 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3541
3542 // Generate the PtrAdd and truncating stores.
3543 LLT PtrTy = MRI.getType(PtrReg);
3544 auto OffsetCst = MIRBuilder.buildConstant(
3545 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3546 auto SmallPtr =
3547 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3548
3549 MachineMemOperand *LargeMMO =
3550 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3551 MachineMemOperand *SmallMMO =
3552 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3553 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3554 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3555 StoreMI.eraseFromParent();
3556 return Legalized;
3557}
3558
3560LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3561 switch (MI.getOpcode()) {
3562 case TargetOpcode::G_LOAD: {
3563 if (TypeIdx != 0)
3564 return UnableToLegalize;
3565 MachineMemOperand &MMO = **MI.memoperands_begin();
3566
3567 // Not sure how to interpret a bitcast of an extending load.
3568 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3569 return UnableToLegalize;
3570
3572 bitcastDst(MI, CastTy, 0);
3573 MMO.setType(CastTy);
3575 return Legalized;
3576 }
3577 case TargetOpcode::G_STORE: {
3578 if (TypeIdx != 0)
3579 return UnableToLegalize;
3580
3581 MachineMemOperand &MMO = **MI.memoperands_begin();
3582
3583 // Not sure how to interpret a bitcast of a truncating store.
3584 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3585 return UnableToLegalize;
3586
3588 bitcastSrc(MI, CastTy, 0);
3589 MMO.setType(CastTy);
3591 return Legalized;
3592 }
3593 case TargetOpcode::G_SELECT: {
3594 if (TypeIdx != 0)
3595 return UnableToLegalize;
3596
3597 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3598 LLVM_DEBUG(
3599 dbgs() << "bitcast action not implemented for vector select\n");
3600 return UnableToLegalize;
3601 }
3602
3604 bitcastSrc(MI, CastTy, 2);
3605 bitcastSrc(MI, CastTy, 3);
3606 bitcastDst(MI, CastTy, 0);
3608 return Legalized;
3609 }
3610 case TargetOpcode::G_AND:
3611 case TargetOpcode::G_OR:
3612 case TargetOpcode::G_XOR: {
3614 bitcastSrc(MI, CastTy, 1);
3615 bitcastSrc(MI, CastTy, 2);
3616 bitcastDst(MI, CastTy, 0);
3618 return Legalized;
3619 }
3620 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3621 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3622 case TargetOpcode::G_INSERT_VECTOR_ELT:
3623 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3624 default:
3625 return UnableToLegalize;
3626 }
3627}
3628
3629// Legalize an instruction by changing the opcode in place.
3630void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3632 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3634}
3635
3637LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3638 using namespace TargetOpcode;
3639
3640 switch(MI.getOpcode()) {
3641 default:
3642 return UnableToLegalize;
3643 case TargetOpcode::G_FCONSTANT:
3644 return lowerFConstant(MI);
3645 case TargetOpcode::G_BITCAST:
3646 return lowerBitcast(MI);
3647 case TargetOpcode::G_SREM:
3648 case TargetOpcode::G_UREM: {
3649 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3650 auto Quot =
3651 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3652 {MI.getOperand(1), MI.getOperand(2)});
3653
3654 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3655 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3656 MI.eraseFromParent();
3657 return Legalized;
3658 }
3659 case TargetOpcode::G_SADDO:
3660 case TargetOpcode::G_SSUBO:
3661 return lowerSADDO_SSUBO(MI);
3662 case TargetOpcode::G_UMULH:
3663 case TargetOpcode::G_SMULH:
3664 return lowerSMULH_UMULH(MI);
3665 case TargetOpcode::G_SMULO:
3666 case TargetOpcode::G_UMULO: {
3667 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3668 // result.
3669 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3670 LLT Ty = MRI.getType(Res);
3671
3672 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3673 ? TargetOpcode::G_SMULH
3674 : TargetOpcode::G_UMULH;
3675
3677 const auto &TII = MIRBuilder.getTII();
3678 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3679 MI.removeOperand(1);
3681
3682 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3683 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3684
3685 // Move insert point forward so we can use the Res register if needed.
3687
3688 // For *signed* multiply, overflow is detected by checking:
3689 // (hi != (lo >> bitwidth-1))
3690 if (Opcode == TargetOpcode::G_SMULH) {
3691 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3692 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3693 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3694 } else {
3695 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3696 }
3697 return Legalized;
3698 }
3699 case TargetOpcode::G_FNEG: {
3700 auto [Res, SubByReg] = MI.getFirst2Regs();
3701 LLT Ty = MRI.getType(Res);
3702
3703 // TODO: Handle vector types once we are able to
3704 // represent them.
3705 if (Ty.isVector())
3706 return UnableToLegalize;
3707 auto SignMask =
3709 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3710 MI.eraseFromParent();
3711 return Legalized;
3712 }
3713 case TargetOpcode::G_FSUB:
3714 case TargetOpcode::G_STRICT_FSUB: {
3715 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3716 LLT Ty = MRI.getType(Res);
3717
3718 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3719 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3720
3721 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3722 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3723 else
3724 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3725
3726 MI.eraseFromParent();
3727 return Legalized;
3728 }
3729 case TargetOpcode::G_FMAD:
3730 return lowerFMad(MI);
3731 case TargetOpcode::G_FFLOOR:
3732 return lowerFFloor(MI);
3733 case TargetOpcode::G_INTRINSIC_ROUND:
3734 return lowerIntrinsicRound(MI);
3735 case TargetOpcode::G_FRINT: {
3736 // Since round even is the assumed rounding mode for unconstrained FP
3737 // operations, rint and roundeven are the same operation.
3738 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3739 return Legalized;
3740 }
3741 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3742 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3743 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3744 **MI.memoperands_begin());
3745 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3746 MI.eraseFromParent();
3747 return Legalized;
3748 }
3749 case TargetOpcode::G_LOAD:
3750 case TargetOpcode::G_SEXTLOAD:
3751 case TargetOpcode::G_ZEXTLOAD:
3752 return lowerLoad(cast<GAnyLoad>(MI));
3753 case TargetOpcode::G_STORE:
3754 return lowerStore(cast<GStore>(MI));
3755 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3756 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3757 case TargetOpcode::G_CTLZ:
3758 case TargetOpcode::G_CTTZ:
3759 case TargetOpcode::G_CTPOP:
3760 return lowerBitCount(MI);
3761 case G_UADDO: {
3762 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3763
3764 MIRBuilder.buildAdd(Res, LHS, RHS);
3765 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3766
3767 MI.eraseFromParent();
3768 return Legalized;
3769 }
3770 case G_UADDE: {
3771 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3772 const LLT CondTy = MRI.getType(CarryOut);
3773 const LLT Ty = MRI.getType(Res);
3774
3775 // Initial add of the two operands.
3776 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3777
3778 // Initial check for carry.
3779 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3780
3781 // Add the sum and the carry.
3782 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3783 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3784
3785 // Second check for carry. We can only carry if the initial sum is all 1s
3786 // and the carry is set, resulting in a new sum of 0.
3787 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3788 auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
3789 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3790 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3791
3792 MI.eraseFromParent();
3793 return Legalized;
3794 }
3795 case G_USUBO: {
3796 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3797
3798 MIRBuilder.buildSub(Res, LHS, RHS);
3800
3801 MI.eraseFromParent();
3802 return Legalized;
3803 }
3804 case G_USUBE: {
3805 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3806 const LLT CondTy = MRI.getType(BorrowOut);
3807 const LLT Ty = MRI.getType(Res);
3808
3809 // Initial subtract of the two operands.
3810 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3811
3812 // Initial check for borrow.
3813 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3814
3815 // Subtract the borrow from the first subtract.
3816 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3817 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3818
3819 // Second check for borrow. We can only borrow if the initial difference is
3820 // 0 and the borrow is set, resulting in a new difference of all 1s.
3821 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3822 auto TmpResEqZero =
3823 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3824 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3825 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3826
3827 MI.eraseFromParent();
3828 return Legalized;
3829 }
3830 case G_UITOFP:
3831 return lowerUITOFP(MI);
3832 case G_SITOFP:
3833 return lowerSITOFP(MI);
3834 case G_FPTOUI:
3835 return lowerFPTOUI(MI);
3836 case G_FPTOSI:
3837 return lowerFPTOSI(MI);
3838 case G_FPTRUNC:
3839 return lowerFPTRUNC(MI);
3840 case G_FPOWI:
3841 return lowerFPOWI(MI);
3842 case G_SMIN:
3843 case G_SMAX:
3844 case G_UMIN:
3845 case G_UMAX:
3846 return lowerMinMax(MI);
3847 case G_FCOPYSIGN:
3848 return lowerFCopySign(MI);
3849 case G_FMINNUM:
3850 case G_FMAXNUM:
3851 return lowerFMinNumMaxNum(MI);
3852 case G_MERGE_VALUES:
3853 return lowerMergeValues(MI);
3854 case G_UNMERGE_VALUES:
3855 return lowerUnmergeValues(MI);
3856 case TargetOpcode::G_SEXT_INREG: {
3857 assert(MI.getOperand(2).isImm() && "Expected immediate");
3858 int64_t SizeInBits = MI.getOperand(2).getImm();
3859
3860 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3861 LLT DstTy = MRI.getType(DstReg);
3862 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3863
3864 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3865 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3866 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3867 MI.eraseFromParent();
3868 return Legalized;
3869 }
3870 case G_EXTRACT_VECTOR_ELT:
3871 case G_INSERT_VECTOR_ELT:
3873 case G_SHUFFLE_VECTOR:
3874 return lowerShuffleVector(MI);
3875 case G_DYN_STACKALLOC:
3876 return lowerDynStackAlloc(MI);
3877 case G_STACKSAVE:
3878 return lowerStackSave(MI);
3879 case G_STACKRESTORE:
3880 return lowerStackRestore(MI);
3881 case G_EXTRACT:
3882 return lowerExtract(MI);
3883 case G_INSERT:
3884 return lowerInsert(MI);
3885 case G_BSWAP:
3886 return lowerBswap(MI);
3887 case G_BITREVERSE:
3888 return lowerBitreverse(MI);
3889 case G_READ_REGISTER:
3890 case G_WRITE_REGISTER:
3891 return lowerReadWriteRegister(MI);
3892 case G_UADDSAT:
3893 case G_USUBSAT: {
3894 // Try to make a reasonable guess about which lowering strategy to use. The
3895 // target can override this with custom lowering and calling the
3896 // implementation functions.
3897 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3898 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3899 return lowerAddSubSatToMinMax(MI);
3901 }
3902 case G_SADDSAT:
3903 case G_SSUBSAT: {
3904 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3905
3906 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3907 // since it's a shorter expansion. However, we would need to figure out the
3908 // preferred boolean type for the carry out for the query.
3909 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3910 return lowerAddSubSatToMinMax(MI);
3912 }
3913 case G_SSHLSAT:
3914 case G_USHLSAT:
3915 return lowerShlSat(MI);
3916 case G_ABS:
3917 return lowerAbsToAddXor(MI);
3918 case G_SELECT:
3919 return lowerSelect(MI);
3920 case G_IS_FPCLASS:
3921 return lowerISFPCLASS(MI);
3922 case G_SDIVREM:
3923 case G_UDIVREM:
3924 return lowerDIVREM(MI);
3925 case G_FSHL:
3926 case G_FSHR:
3927 return lowerFunnelShift(MI);
3928 case G_ROTL:
3929 case G_ROTR:
3930 return lowerRotate(MI);
3931 case G_MEMSET:
3932 case G_MEMCPY:
3933 case G_MEMMOVE:
3934 return lowerMemCpyFamily(MI);
3935 case G_MEMCPY_INLINE:
3936 return lowerMemcpyInline(MI);
3937 case G_ZEXT:
3938 case G_SEXT:
3939 case G_ANYEXT:
3940 return lowerEXT(MI);
3941 case G_TRUNC:
3942 return lowerTRUNC(MI);
3944 return lowerVectorReduction(MI);
3945 case G_VAARG:
3946 return lowerVAArg(MI);
3947 }
3948}
3949
3951 Align MinAlign) const {
3952 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3953 // datalayout for the preferred alignment. Also there should be a target hook
3954 // for this to allow targets to reduce the alignment and ignore the
3955 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3956 // the type.
3957 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3958}
3959
3962 MachinePointerInfo &PtrInfo) {
3965 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3966
3967 unsigned AddrSpace = DL.getAllocaAddrSpace();
3968 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3969
3970 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3971 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3972}
3973
3975 LLT VecTy) {
3976 LLT IdxTy = B.getMRI()->getType(IdxReg);
3977 unsigned NElts = VecTy.getNumElements();
3978
3979 int64_t IdxVal;
3980 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
3981 if (IdxVal < VecTy.getNumElements())
3982 return IdxReg;
3983 // If a constant index would be out of bounds, clamp it as well.
3984 }
3985
3986 if (isPowerOf2_32(NElts)) {
3987 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3988 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3989 }
3990
3991 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3992 .getReg(0);
3993}
3994
3996 Register Index) {
3997 LLT EltTy = VecTy.getElementType();
3998
3999 // Calculate the element offset and add it to the pointer.
4000 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4001 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4002 "Converting bits to bytes lost precision");
4003
4005
4006 LLT IdxTy = MRI.getType(Index);
4007 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4008 MIRBuilder.buildConstant(IdxTy, EltSize));
4009
4010 LLT PtrTy = MRI.getType(VecPtr);
4011 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4012}
4013
4014#ifndef NDEBUG
4015/// Check that all vector operands have same number of elements. Other operands
4016/// should be listed in NonVecOp.
4019 std::initializer_list<unsigned> NonVecOpIndices) {
4020 if (MI.getNumMemOperands() != 0)
4021 return false;
4022
4023 LLT VecTy = MRI.getType(MI.getReg(0));
4024 if (!VecTy.isVector())
4025 return false;
4026 unsigned NumElts = VecTy.getNumElements();
4027
4028 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4029 MachineOperand &Op = MI.getOperand(OpIdx);
4030 if (!Op.isReg()) {
4031 if (!is_contained(NonVecOpIndices, OpIdx))
4032 return false;
4033 continue;
4034 }
4035
4036 LLT Ty = MRI.getType(Op.getReg());
4037 if (!Ty.isVector()) {
4038 if (!is_contained(NonVecOpIndices, OpIdx))
4039 return false;
4040 continue;
4041 }
4042
4043 if (Ty.getNumElements() != NumElts)
4044 return false;
4045 }
4046
4047 return true;
4048}
4049#endif
4050
4051/// Fill \p DstOps with DstOps that have same number of elements combined as
4052/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4053/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4054/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4055static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4056 unsigned NumElts) {
4057 LLT LeftoverTy;
4058 assert(Ty.isVector() && "Expected vector type");
4059 LLT EltTy = Ty.getElementType();
4060 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4061 int NumParts, NumLeftover;
4062 std::tie(NumParts, NumLeftover) =
4063 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4064
4065 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4066 for (int i = 0; i < NumParts; ++i) {
4067 DstOps.push_back(NarrowTy);
4068 }
4069
4070 if (LeftoverTy.isValid()) {
4071 assert(NumLeftover == 1 && "expected exactly one leftover");
4072 DstOps.push_back(LeftoverTy);
4073 }
4074}
4075
4076/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4077/// made from \p Op depending on operand type.
4078static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4079 MachineOperand &Op) {
4080 for (unsigned i = 0; i < N; ++i) {
4081 if (Op.isReg())
4082 Ops.push_back(Op.getReg());
4083 else if (Op.isImm())
4084 Ops.push_back(Op.getImm());
4085 else if (Op.isPredicate())
4086 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4087 else
4088 llvm_unreachable("Unsupported type");
4089 }
4090}
4091
4092// Handle splitting vector operations which need to have the same number of
4093// elements in each type index, but each type index may have a different element
4094// type.
4095//
4096// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4097// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4098// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4099//
4100// Also handles some irregular breakdown cases, e.g.
4101// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4102// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4103// s64 = G_SHL s64, s32
4106 GenericMachineInstr &MI, unsigned NumElts,
4107 std::initializer_list<unsigned> NonVecOpIndices) {
4108 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4109 "Non-compatible opcode or not specified non-vector operands");
4110 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4111
4112 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4113 unsigned NumDefs = MI.getNumDefs();
4114
4115 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4116 // Build instructions with DstOps to use instruction found by CSE directly.
4117 // CSE copies found instruction into given vreg when building with vreg dest.
4118 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4119 // Output registers will be taken from created instructions.
4120 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4121 for (unsigned i = 0; i < NumDefs; ++i) {
4122 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4123 }
4124
4125 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4126 // Operands listed in NonVecOpIndices will be used as is without splitting;
4127 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4128 // scalar condition (op 1), immediate in sext_inreg (op 2).
4129 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4130 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4131 ++UseIdx, ++UseNo) {
4132 if (is_contained(NonVecOpIndices, UseIdx)) {
4133 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4134 MI.getOperand(UseIdx));
4135 } else {
4136 SmallVector<Register, 8> SplitPieces;
4137 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4138 MRI);
4139 for (auto Reg : SplitPieces)
4140 InputOpsPieces[UseNo].push_back(Reg);
4141 }
4142 }
4143
4144 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4145
4146 // Take i-th piece of each input operand split and build sub-vector/scalar
4147 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4148 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4150 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4151 Defs.push_back(OutputOpsPieces[DstNo][i]);
4152
4154 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4155 Uses.push_back(InputOpsPieces[InputNo][i]);
4156
4157 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4158 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4159 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4160 }
4161
4162 // Merge small outputs into MI's output for each def operand.
4163 if (NumLeftovers) {
4164 for (unsigned i = 0; i < NumDefs; ++i)
4165 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4166 } else {
4167 for (unsigned i = 0; i < NumDefs; ++i)
4168 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4169 }
4170
4171 MI.eraseFromParent();
4172 return Legalized;
4173}
4174
4177 unsigned NumElts) {
4178 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4179
4180 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4181 unsigned NumDefs = MI.getNumDefs();
4182
4183 SmallVector<DstOp, 8> OutputOpsPieces;
4184 SmallVector<Register, 8> OutputRegs;
4185 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4186
4187 // Instructions that perform register split will be inserted in basic block
4188 // where register is defined (basic block is in the next operand).
4189 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4190 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4191 UseIdx += 2, ++UseNo) {
4192 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4194 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4195 MIRBuilder, MRI);
4196 }
4197
4198 // Build PHIs with fewer elements.
4199 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4200 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4201 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4202 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4203 Phi.addDef(
4204 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4205 OutputRegs.push_back(Phi.getReg(0));
4206
4207 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4208 Phi.addUse(InputOpsPieces[j][i]);
4209 Phi.add(MI.getOperand(1 + j * 2 + 1));
4210 }
4211 }
4212
4213 // Merge small outputs into MI's def.
4214 if (NumLeftovers) {
4215 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4216 } else {
4217 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4218 }
4219
4220 MI.eraseFromParent();
4221 return Legalized;
4222}
4223
4226 unsigned TypeIdx,
4227 LLT NarrowTy) {
4228 const int NumDst = MI.getNumOperands() - 1;
4229 const Register SrcReg = MI.getOperand(NumDst).getReg();
4230 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4231 LLT SrcTy = MRI.getType(SrcReg);
4232
4233 if (TypeIdx != 1 || NarrowTy == DstTy)
4234 return UnableToLegalize;
4235
4236 // Requires compatible types. Otherwise SrcReg should have been defined by
4237 // merge-like instruction that would get artifact combined. Most likely
4238 // instruction that defines SrcReg has to perform more/fewer elements
4239 // legalization compatible with NarrowTy.
4240 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4241 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4242
4243 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4244 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4245 return UnableToLegalize;
4246
4247 // This is most likely DstTy (smaller then register size) packed in SrcTy
4248 // (larger then register size) and since unmerge was not combined it will be
4249 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4250 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4251
4252 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4253 //
4254 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4255 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4256 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4257 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4258 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4259 const int PartsPerUnmerge = NumDst / NumUnmerge;
4260
4261 for (int I = 0; I != NumUnmerge; ++I) {
4262 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4263
4264 for (int J = 0; J != PartsPerUnmerge; ++J)
4265 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4266 MIB.addUse(Unmerge.getReg(I));
4267 }
4268
4269 MI.eraseFromParent();
4270 return Legalized;
4271}
4272
4275 LLT NarrowTy) {
4276 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4277 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4278 // that should have been artifact combined. Most likely instruction that uses
4279 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4280 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4281 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4282 if (NarrowTy == SrcTy)
4283 return UnableToLegalize;
4284
4285 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4286 // is for old mir tests. Since the changes to more/fewer elements it should no
4287 // longer be possible to generate MIR like this when starting from llvm-ir
4288 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4289 if (TypeIdx == 1) {
4290 assert(SrcTy.isVector() && "Expected vector types");
4291 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4292 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4293 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4294 return UnableToLegalize;
4295 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4296 //
4297 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4298 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4299 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4300 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4301 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4302 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4303
4305 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4306 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4307 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4308 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4309 Elts.push_back(Unmerge.getReg(j));
4310 }
4311
4312 SmallVector<Register, 8> NarrowTyElts;
4313 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4314 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4315 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4316 ++i, Offset += NumNarrowTyElts) {
4317 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4318 NarrowTyElts.push_back(
4319 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4320 }
4321
4322 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4323 MI.eraseFromParent();
4324 return Legalized;
4325 }
4326
4327 assert(TypeIdx == 0 && "Bad type index");
4328 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4329 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4330 return UnableToLegalize;
4331
4332 // This is most likely SrcTy (smaller then register size) packed in DstTy
4333 // (larger then register size) and since merge was not combined it will be
4334 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4335 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4336
4337 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4338 //
4339 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4340 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4341 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4342 SmallVector<Register, 8> NarrowTyElts;
4343 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4344 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4345 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4346 for (unsigned i = 0; i < NumParts; ++i) {
4348 for (unsigned j = 0; j < NumElts; ++j)
4349 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4350 NarrowTyElts.push_back(
4351 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4352 }
4353
4354 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4355 MI.eraseFromParent();
4356 return Legalized;
4357}
4358
4361 unsigned TypeIdx,
4362 LLT NarrowVecTy) {
4363 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4364 Register InsertVal;
4365 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4366
4367 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4368 if (IsInsert)
4369 InsertVal = MI.getOperand(2).getReg();
4370
4371 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4372
4373 // TODO: Handle total scalarization case.
4374 if (!NarrowVecTy.isVector())
4375 return UnableToLegalize;
4376
4377 LLT VecTy = MRI.getType(SrcVec);
4378
4379 // If the index is a constant, we can really break this down as you would
4380 // expect, and index into the target size pieces.
4381 int64_t IdxVal;
4382 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4383 if (MaybeCst) {
4384 IdxVal = MaybeCst->Value.getSExtValue();
4385 // Avoid out of bounds indexing the pieces.
4386 if (IdxVal >= VecTy.getNumElements()) {
4387 MIRBuilder.buildUndef(DstReg);
4388 MI.eraseFromParent();
4389 return Legalized;
4390 }
4391
4392 SmallVector<Register, 8> VecParts;
4393 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4394
4395 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4396 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4397 TargetOpcode::G_ANYEXT);
4398
4399 unsigned NewNumElts = NarrowVecTy.getNumElements();
4400
4401 LLT IdxTy = MRI.getType(Idx);
4402 int64_t PartIdx = IdxVal / NewNumElts;
4403 auto NewIdx =
4404 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4405
4406 if (IsInsert) {
4407 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4408
4409 // Use the adjusted index to insert into one of the subvectors.
4410 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4411 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4412 VecParts[PartIdx] = InsertPart.getReg(0);
4413
4414 // Recombine the inserted subvector with the others to reform the result
4415 // vector.
4416 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4417 } else {
4418