LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FLOG10:
453 RTLIBCASE(LOG10_F);
454 case TargetOpcode::G_FLOG:
455 RTLIBCASE(LOG_F);
456 case TargetOpcode::G_FLOG2:
457 RTLIBCASE(LOG2_F);
458 case TargetOpcode::G_FLDEXP:
459 RTLIBCASE(LDEXP_F);
460 case TargetOpcode::G_FCEIL:
461 RTLIBCASE(CEIL_F);
462 case TargetOpcode::G_FFLOOR:
463 RTLIBCASE(FLOOR_F);
464 case TargetOpcode::G_FMINNUM:
465 RTLIBCASE(FMIN_F);
466 case TargetOpcode::G_FMAXNUM:
467 RTLIBCASE(FMAX_F);
468 case TargetOpcode::G_FSQRT:
469 RTLIBCASE(SQRT_F);
470 case TargetOpcode::G_FRINT:
471 RTLIBCASE(RINT_F);
472 case TargetOpcode::G_FNEARBYINT:
473 RTLIBCASE(NEARBYINT_F);
474 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
475 RTLIBCASE(ROUNDEVEN_F);
476 case TargetOpcode::G_INTRINSIC_LRINT:
477 RTLIBCASE(LRINT_F);
478 case TargetOpcode::G_INTRINSIC_LLRINT:
479 RTLIBCASE(LLRINT_F);
480 }
481 llvm_unreachable("Unknown libcall function");
482}
483
484/// True if an instruction is in tail position in its caller. Intended for
485/// legalizing libcalls as tail calls when possible.
488 const TargetInstrInfo &TII,
490 MachineBasicBlock &MBB = *MI.getParent();
491 const Function &F = MBB.getParent()->getFunction();
492
493 // Conservatively require the attributes of the call to match those of
494 // the return. Ignore NoAlias and NonNull because they don't affect the
495 // call sequence.
496 AttributeList CallerAttrs = F.getAttributes();
497 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
498 .removeAttribute(Attribute::NoAlias)
499 .removeAttribute(Attribute::NonNull)
500 .hasAttributes())
501 return false;
502
503 // It's not safe to eliminate the sign / zero extension of the return value.
504 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
505 CallerAttrs.hasRetAttr(Attribute::SExt))
506 return false;
507
508 // Only tail call if the following instruction is a standard return or if we
509 // have a `thisreturn` callee, and a sequence like:
510 //
511 // G_MEMCPY %0, %1, %2
512 // $x0 = COPY %0
513 // RET_ReallyLR implicit $x0
514 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
515 if (Next != MBB.instr_end() && Next->isCopy()) {
516 if (MI.getOpcode() == TargetOpcode::G_BZERO)
517 return false;
518
519 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
520 // mempy/etc routines return the same parameter. For other it will be the
521 // returned value.
522 Register VReg = MI.getOperand(0).getReg();
523 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
524 return false;
525
526 Register PReg = Next->getOperand(0).getReg();
527 if (!PReg.isPhysical())
528 return false;
529
530 auto Ret = next_nodbg(Next, MBB.instr_end());
531 if (Ret == MBB.instr_end() || !Ret->isReturn())
532 return false;
533
534 if (Ret->getNumImplicitOperands() != 1)
535 return false;
536
537 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
538 return false;
539
540 // Skip over the COPY that we just validated.
541 Next = Ret;
542 }
543
544 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
545 return false;
546
547 return true;
548}
549
552 const CallLowering::ArgInfo &Result,
554 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
555 MachineInstr *MI) {
556 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
557
559 Info.CallConv = CC;
561 Info.OrigRet = Result;
562 if (MI)
563 Info.IsTailCall =
564 (Result.Ty->isVoidTy() ||
565 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
566 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
567 *MIRBuilder.getMRI());
568
569 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
570 if (!CLI.lowerCall(MIRBuilder, Info))
572
573 if (MI && Info.LoweredTailCall) {
574 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
575
576 // Check debug locations before removing the return.
577 LocObserver.checkpoint(true);
578
579 // We must have a return following the call (or debug insts) to get past
580 // isLibCallInTailPosition.
581 do {
582 MachineInstr *Next = MI->getNextNode();
583 assert(Next &&
584 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
585 "Expected instr following MI to be return or debug inst?");
586 // We lowered a tail call, so the call is now the return from the block.
587 // Delete the old return.
588 Next->eraseFromParent();
589 } while (MI->getNextNode());
590
591 // We expect to lose the debug location from the return.
592 LocObserver.checkpoint(false);
593 }
595}
596
599 const CallLowering::ArgInfo &Result,
601 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
602 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
603 const char *Name = TLI.getLibcallName(Libcall);
604 if (!Name)
606 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
607 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
608}
609
610// Useful for libcalls where all operands have the same type.
613 Type *OpType, LostDebugLocObserver &LocObserver) {
614 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
615
616 // FIXME: What does the original arg index mean here?
618 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
619 Args.push_back({MO.getReg(), OpType, 0});
620 return createLibcall(MIRBuilder, Libcall,
621 {MI.getOperand(0).getReg(), OpType, 0}, Args,
622 LocObserver, &MI);
623}
624
627 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
628 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
629
631 // Add all the args, except for the last which is an imm denoting 'tail'.
632 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
633 Register Reg = MI.getOperand(i).getReg();
634
635 // Need derive an IR type for call lowering.
636 LLT OpLLT = MRI.getType(Reg);
637 Type *OpTy = nullptr;
638 if (OpLLT.isPointer())
639 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
640 else
641 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
642 Args.push_back({Reg, OpTy, 0});
643 }
644
645 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
646 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
647 RTLIB::Libcall RTLibcall;
648 unsigned Opc = MI.getOpcode();
649 switch (Opc) {
650 case TargetOpcode::G_BZERO:
651 RTLibcall = RTLIB::BZERO;
652 break;
653 case TargetOpcode::G_MEMCPY:
654 RTLibcall = RTLIB::MEMCPY;
655 Args[0].Flags[0].setReturned();
656 break;
657 case TargetOpcode::G_MEMMOVE:
658 RTLibcall = RTLIB::MEMMOVE;
659 Args[0].Flags[0].setReturned();
660 break;
661 case TargetOpcode::G_MEMSET:
662 RTLibcall = RTLIB::MEMSET;
663 Args[0].Flags[0].setReturned();
664 break;
665 default:
666 llvm_unreachable("unsupported opcode");
667 }
668 const char *Name = TLI.getLibcallName(RTLibcall);
669
670 // Unsupported libcall on the target.
671 if (!Name) {
672 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
673 << MIRBuilder.getTII().getName(Opc) << "\n");
675 }
676
678 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
680 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
681 Info.IsTailCall =
682 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
683 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
684
685 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
686 if (!CLI.lowerCall(MIRBuilder, Info))
688
689 if (Info.LoweredTailCall) {
690 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
691
692 // Check debug locations before removing the return.
693 LocObserver.checkpoint(true);
694
695 // We must have a return following the call (or debug insts) to get past
696 // isLibCallInTailPosition.
697 do {
698 MachineInstr *Next = MI.getNextNode();
699 assert(Next &&
700 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
701 "Expected instr following MI to be return or debug inst?");
702 // We lowered a tail call, so the call is now the return from the block.
703 // Delete the old return.
704 Next->eraseFromParent();
705 } while (MI.getNextNode());
706
707 // We expect to lose the debug location from the return.
708 LocObserver.checkpoint(false);
709 }
710
712}
713
715 unsigned Opc = MI.getOpcode();
716 auto &AtomicMI = cast<GMemOperation>(MI);
717 auto &MMO = AtomicMI.getMMO();
718 auto Ordering = MMO.getMergedOrdering();
719 LLT MemType = MMO.getMemoryType();
720 uint64_t MemSize = MemType.getSizeInBytes();
721 if (MemType.isVector())
722 return RTLIB::UNKNOWN_LIBCALL;
723
724#define LCALLS(A, B) \
725 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
726#define LCALL5(A) \
727 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
728 switch (Opc) {
729 case TargetOpcode::G_ATOMIC_CMPXCHG:
730 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
731 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
732 return getOutlineAtomicHelper(LC, Ordering, MemSize);
733 }
734 case TargetOpcode::G_ATOMICRMW_XCHG: {
735 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
736 return getOutlineAtomicHelper(LC, Ordering, MemSize);
737 }
738 case TargetOpcode::G_ATOMICRMW_ADD:
739 case TargetOpcode::G_ATOMICRMW_SUB: {
740 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
741 return getOutlineAtomicHelper(LC, Ordering, MemSize);
742 }
743 case TargetOpcode::G_ATOMICRMW_AND: {
744 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
745 return getOutlineAtomicHelper(LC, Ordering, MemSize);
746 }
747 case TargetOpcode::G_ATOMICRMW_OR: {
748 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
749 return getOutlineAtomicHelper(LC, Ordering, MemSize);
750 }
751 case TargetOpcode::G_ATOMICRMW_XOR: {
752 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
753 return getOutlineAtomicHelper(LC, Ordering, MemSize);
754 }
755 default:
756 return RTLIB::UNKNOWN_LIBCALL;
757 }
758#undef LCALLS
759#undef LCALL5
760}
761
764 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
765
766 Type *RetTy;
767 SmallVector<Register> RetRegs;
769 unsigned Opc = MI.getOpcode();
770 switch (Opc) {
771 case TargetOpcode::G_ATOMIC_CMPXCHG:
772 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
774 LLT SuccessLLT;
775 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
776 MI.getFirst4RegLLTs();
777 RetRegs.push_back(Ret);
778 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
779 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
780 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
781 NewLLT) = MI.getFirst5RegLLTs();
782 RetRegs.push_back(Success);
784 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
785 }
786 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
787 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
788 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
789 break;
790 }
791 case TargetOpcode::G_ATOMICRMW_XCHG:
792 case TargetOpcode::G_ATOMICRMW_ADD:
793 case TargetOpcode::G_ATOMICRMW_SUB:
794 case TargetOpcode::G_ATOMICRMW_AND:
795 case TargetOpcode::G_ATOMICRMW_OR:
796 case TargetOpcode::G_ATOMICRMW_XOR: {
797 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
798 RetRegs.push_back(Ret);
799 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
800 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
801 Val =
802 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
803 .getReg(0);
804 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
805 Val =
806 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
807 .getReg(0);
808 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
809 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
810 break;
811 }
812 default:
813 llvm_unreachable("unsupported opcode");
814 }
815
816 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
817 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
819 const char *Name = TLI.getLibcallName(RTLibcall);
820
821 // Unsupported libcall on the target.
822 if (!Name) {
823 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
824 << MIRBuilder.getTII().getName(Opc) << "\n");
826 }
827
829 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
831 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
832
833 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
834 if (!CLI.lowerCall(MIRBuilder, Info))
836
838}
839
840static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
841 Type *FromType) {
842 auto ToMVT = MVT::getVT(ToType);
843 auto FromMVT = MVT::getVT(FromType);
844
845 switch (Opcode) {
846 case TargetOpcode::G_FPEXT:
847 return RTLIB::getFPEXT(FromMVT, ToMVT);
848 case TargetOpcode::G_FPTRUNC:
849 return RTLIB::getFPROUND(FromMVT, ToMVT);
850 case TargetOpcode::G_FPTOSI:
851 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
852 case TargetOpcode::G_FPTOUI:
853 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
854 case TargetOpcode::G_SITOFP:
855 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
856 case TargetOpcode::G_UITOFP:
857 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
858 }
859 llvm_unreachable("Unsupported libcall function");
860}
861
864 Type *FromType, LostDebugLocObserver &LocObserver) {
865 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
866 return createLibcall(
867 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
868 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
869}
870
871static RTLIB::Libcall
873 RTLIB::Libcall RTLibcall;
874 switch (MI.getOpcode()) {
875 case TargetOpcode::G_GET_FPENV:
876 RTLibcall = RTLIB::FEGETENV;
877 break;
878 case TargetOpcode::G_SET_FPENV:
879 case TargetOpcode::G_RESET_FPENV:
880 RTLibcall = RTLIB::FESETENV;
881 break;
882 case TargetOpcode::G_GET_FPMODE:
883 RTLibcall = RTLIB::FEGETMODE;
884 break;
885 case TargetOpcode::G_SET_FPMODE:
886 case TargetOpcode::G_RESET_FPMODE:
887 RTLibcall = RTLIB::FESETMODE;
888 break;
889 default:
890 llvm_unreachable("Unexpected opcode");
891 }
892 return RTLibcall;
893}
894
895// Some library functions that read FP state (fegetmode, fegetenv) write the
896// state into a region in memory. IR intrinsics that do the same operations
897// (get_fpmode, get_fpenv) return the state as integer value. To implement these
898// intrinsics via the library functions, we need to use temporary variable,
899// for example:
900//
901// %0:_(s32) = G_GET_FPMODE
902//
903// is transformed to:
904//
905// %1:_(p0) = G_FRAME_INDEX %stack.0
906// BL &fegetmode
907// %0:_(s32) = G_LOAD % 1
908//
910LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
912 LostDebugLocObserver &LocObserver) {
914 auto &MF = MIRBuilder.getMF();
915 auto &MRI = *MIRBuilder.getMRI();
916 auto &Ctx = MF.getFunction().getContext();
917
918 // Create temporary, where library function will put the read state.
919 Register Dst = MI.getOperand(0).getReg();
920 LLT StateTy = MRI.getType(Dst);
921 TypeSize StateSize = StateTy.getSizeInBytes();
923 MachinePointerInfo TempPtrInfo;
924 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
925
926 // Create a call to library function, with the temporary as an argument.
927 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
928 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
930 auto Res =
931 createLibcall(MIRBuilder, RTLibcall,
933 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
934 LocObserver, nullptr);
936 return Res;
937
938 // Create a load from the temporary.
939 MachineMemOperand *MMO = MF.getMachineMemOperand(
940 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
941 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
942
944}
945
946// Similar to `createGetStateLibcall` the function calls a library function
947// using transient space in stack. In this case the library function reads
948// content of memory region.
950LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
952 LostDebugLocObserver &LocObserver) {
954 auto &MF = MIRBuilder.getMF();
955 auto &MRI = *MIRBuilder.getMRI();
956 auto &Ctx = MF.getFunction().getContext();
957
958 // Create temporary, where library function will get the new state.
959 Register Src = MI.getOperand(0).getReg();
960 LLT StateTy = MRI.getType(Src);
961 TypeSize StateSize = StateTy.getSizeInBytes();
963 MachinePointerInfo TempPtrInfo;
964 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
965
966 // Put the new state into the temporary.
967 MachineMemOperand *MMO = MF.getMachineMemOperand(
968 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
969 MIRBuilder.buildStore(Src, Temp, *MMO);
970
971 // Create a call to library function, with the temporary as an argument.
972 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
973 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
975 return createLibcall(MIRBuilder, RTLibcall,
977 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
978 LocObserver, nullptr);
979}
980
981// The function is used to legalize operations that set default environment
982// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
983// On most targets supported in glibc FE_DFL_MODE is defined as
984// `((const femode_t *) -1)`. Such assumption is used here. If for some target
985// it is not true, the target must provide custom lowering.
987LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
989 LostDebugLocObserver &LocObserver) {
991 auto &MF = MIRBuilder.getMF();
992 auto &Ctx = MF.getFunction().getContext();
993
994 // Create an argument for the library function.
995 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
996 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
997 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
998 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
999 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1000 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1001 MIRBuilder.buildIntToPtr(Dest, DefValue);
1002
1004 return createLibcall(MIRBuilder, RTLibcall,
1006 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1007 LocObserver, &MI);
1008}
1009
1012 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1013
1014 switch (MI.getOpcode()) {
1015 default:
1016 return UnableToLegalize;
1017 case TargetOpcode::G_MUL:
1018 case TargetOpcode::G_SDIV:
1019 case TargetOpcode::G_UDIV:
1020 case TargetOpcode::G_SREM:
1021 case TargetOpcode::G_UREM:
1022 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1023 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1024 unsigned Size = LLTy.getSizeInBits();
1025 Type *HLTy = IntegerType::get(Ctx, Size);
1026 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1027 if (Status != Legalized)
1028 return Status;
1029 break;
1030 }
1031 case TargetOpcode::G_FADD:
1032 case TargetOpcode::G_FSUB:
1033 case TargetOpcode::G_FMUL:
1034 case TargetOpcode::G_FDIV:
1035 case TargetOpcode::G_FMA:
1036 case TargetOpcode::G_FPOW:
1037 case TargetOpcode::G_FREM:
1038 case TargetOpcode::G_FCOS:
1039 case TargetOpcode::G_FSIN:
1040 case TargetOpcode::G_FLOG10:
1041 case TargetOpcode::G_FLOG:
1042 case TargetOpcode::G_FLOG2:
1043 case TargetOpcode::G_FLDEXP:
1044 case TargetOpcode::G_FEXP:
1045 case TargetOpcode::G_FEXP2:
1046 case TargetOpcode::G_FEXP10:
1047 case TargetOpcode::G_FCEIL:
1048 case TargetOpcode::G_FFLOOR:
1049 case TargetOpcode::G_FMINNUM:
1050 case TargetOpcode::G_FMAXNUM:
1051 case TargetOpcode::G_FSQRT:
1052 case TargetOpcode::G_FRINT:
1053 case TargetOpcode::G_FNEARBYINT:
1054 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1055 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1056 unsigned Size = LLTy.getSizeInBits();
1057 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1058 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1059 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1060 return UnableToLegalize;
1061 }
1062 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1063 if (Status != Legalized)
1064 return Status;
1065 break;
1066 }
1067 case TargetOpcode::G_INTRINSIC_LRINT:
1068 case TargetOpcode::G_INTRINSIC_LLRINT: {
1069 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1070 unsigned Size = LLTy.getSizeInBits();
1071 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1072 Type *ITy = IntegerType::get(
1073 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1074 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1075 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1076 return UnableToLegalize;
1077 }
1078 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1080 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1081 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1082 if (Status != Legalized)
1083 return Status;
1084 MI.eraseFromParent();
1085 return Legalized;
1086 }
1087 case TargetOpcode::G_FPOWI: {
1088 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1089 unsigned Size = LLTy.getSizeInBits();
1090 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1091 Type *ITy = IntegerType::get(
1092 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1093 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1094 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1095 return UnableToLegalize;
1096 }
1097 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1098 std::initializer_list<CallLowering::ArgInfo> Args = {
1099 {MI.getOperand(1).getReg(), HLTy, 0},
1100 {MI.getOperand(2).getReg(), ITy, 1}};
1102 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1103 Args, LocObserver, &MI);
1104 if (Status != Legalized)
1105 return Status;
1106 break;
1107 }
1108 case TargetOpcode::G_FPEXT:
1109 case TargetOpcode::G_FPTRUNC: {
1110 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1111 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1112 if (!FromTy || !ToTy)
1113 return UnableToLegalize;
1115 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1116 if (Status != Legalized)
1117 return Status;
1118 break;
1119 }
1120 case TargetOpcode::G_FPTOSI:
1121 case TargetOpcode::G_FPTOUI: {
1122 // FIXME: Support other types
1123 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1124 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1125 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1126 return UnableToLegalize;
1128 MI, MIRBuilder,
1129 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1130 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1131 LocObserver);
1132 if (Status != Legalized)
1133 return Status;
1134 break;
1135 }
1136 case TargetOpcode::G_SITOFP:
1137 case TargetOpcode::G_UITOFP: {
1138 // FIXME: Support other types
1139 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1140 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1141 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1142 return UnableToLegalize;
1144 MI, MIRBuilder,
1145 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1146 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1147 LocObserver);
1148 if (Status != Legalized)
1149 return Status;
1150 break;
1151 }
1152 case TargetOpcode::G_ATOMICRMW_XCHG:
1153 case TargetOpcode::G_ATOMICRMW_ADD:
1154 case TargetOpcode::G_ATOMICRMW_SUB:
1155 case TargetOpcode::G_ATOMICRMW_AND:
1156 case TargetOpcode::G_ATOMICRMW_OR:
1157 case TargetOpcode::G_ATOMICRMW_XOR:
1158 case TargetOpcode::G_ATOMIC_CMPXCHG:
1159 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1161 if (Status != Legalized)
1162 return Status;
1163 break;
1164 }
1165 case TargetOpcode::G_BZERO:
1166 case TargetOpcode::G_MEMCPY:
1167 case TargetOpcode::G_MEMMOVE:
1168 case TargetOpcode::G_MEMSET: {
1169 LegalizeResult Result =
1170 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1171 if (Result != Legalized)
1172 return Result;
1173 MI.eraseFromParent();
1174 return Result;
1175 }
1176 case TargetOpcode::G_GET_FPENV:
1177 case TargetOpcode::G_GET_FPMODE: {
1178 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1179 if (Result != Legalized)
1180 return Result;
1181 break;
1182 }
1183 case TargetOpcode::G_SET_FPENV:
1184 case TargetOpcode::G_SET_FPMODE: {
1185 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1186 if (Result != Legalized)
1187 return Result;
1188 break;
1189 }
1190 case TargetOpcode::G_RESET_FPENV:
1191 case TargetOpcode::G_RESET_FPMODE: {
1192 LegalizeResult Result =
1193 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1194 if (Result != Legalized)
1195 return Result;
1196 break;
1197 }
1198 }
1199
1200 MI.eraseFromParent();
1201 return Legalized;
1202}
1203
1205 unsigned TypeIdx,
1206 LLT NarrowTy) {
1207 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1208 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1209
1210 switch (MI.getOpcode()) {
1211 default:
1212 return UnableToLegalize;
1213 case TargetOpcode::G_IMPLICIT_DEF: {
1214 Register DstReg = MI.getOperand(0).getReg();
1215 LLT DstTy = MRI.getType(DstReg);
1216
1217 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1218 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1219 // FIXME: Although this would also be legal for the general case, it causes
1220 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1221 // combines not being hit). This seems to be a problem related to the
1222 // artifact combiner.
1223 if (SizeOp0 % NarrowSize != 0) {
1224 LLT ImplicitTy = NarrowTy;
1225 if (DstTy.isVector())
1226 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1227
1228 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1229 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1230
1231 MI.eraseFromParent();
1232 return Legalized;
1233 }
1234
1235 int NumParts = SizeOp0 / NarrowSize;
1236
1238 for (int i = 0; i < NumParts; ++i)
1239 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1240
1241 if (DstTy.isVector())
1242 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1243 else
1244 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1245 MI.eraseFromParent();
1246 return Legalized;
1247 }
1248 case TargetOpcode::G_CONSTANT: {
1249 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1250 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1251 unsigned TotalSize = Ty.getSizeInBits();
1252 unsigned NarrowSize = NarrowTy.getSizeInBits();
1253 int NumParts = TotalSize / NarrowSize;
1254
1255 SmallVector<Register, 4> PartRegs;
1256 for (int I = 0; I != NumParts; ++I) {
1257 unsigned Offset = I * NarrowSize;
1258 auto K = MIRBuilder.buildConstant(NarrowTy,
1259 Val.lshr(Offset).trunc(NarrowSize));
1260 PartRegs.push_back(K.getReg(0));
1261 }
1262
1263 LLT LeftoverTy;
1264 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1265 SmallVector<Register, 1> LeftoverRegs;
1266 if (LeftoverBits != 0) {
1267 LeftoverTy = LLT::scalar(LeftoverBits);
1268 auto K = MIRBuilder.buildConstant(
1269 LeftoverTy,
1270 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1271 LeftoverRegs.push_back(K.getReg(0));
1272 }
1273
1274 insertParts(MI.getOperand(0).getReg(),
1275 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1276
1277 MI.eraseFromParent();
1278 return Legalized;
1279 }
1280 case TargetOpcode::G_SEXT:
1281 case TargetOpcode::G_ZEXT:
1282 case TargetOpcode::G_ANYEXT:
1283 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1284 case TargetOpcode::G_TRUNC: {
1285 if (TypeIdx != 1)
1286 return UnableToLegalize;
1287
1288 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1289 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1290 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1291 return UnableToLegalize;
1292 }
1293
1294 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1295 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1296 MI.eraseFromParent();
1297 return Legalized;
1298 }
1299
1300 case TargetOpcode::G_FREEZE: {
1301 if (TypeIdx != 0)
1302 return UnableToLegalize;
1303
1304 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1305 // Should widen scalar first
1306 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1307 return UnableToLegalize;
1308
1309 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1311 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1312 Parts.push_back(
1313 MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1314 }
1315
1316 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1317 MI.eraseFromParent();
1318 return Legalized;
1319 }
1320 case TargetOpcode::G_ADD:
1321 case TargetOpcode::G_SUB:
1322 case TargetOpcode::G_SADDO:
1323 case TargetOpcode::G_SSUBO:
1324 case TargetOpcode::G_SADDE:
1325 case TargetOpcode::G_SSUBE:
1326 case TargetOpcode::G_UADDO:
1327 case TargetOpcode::G_USUBO:
1328 case TargetOpcode::G_UADDE:
1329 case TargetOpcode::G_USUBE:
1330 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1331 case TargetOpcode::G_MUL:
1332 case TargetOpcode::G_UMULH:
1333 return narrowScalarMul(MI, NarrowTy);
1334 case TargetOpcode::G_EXTRACT:
1335 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1336 case TargetOpcode::G_INSERT:
1337 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1338 case TargetOpcode::G_LOAD: {
1339 auto &LoadMI = cast<GLoad>(MI);
1340 Register DstReg = LoadMI.getDstReg();
1341 LLT DstTy = MRI.getType(DstReg);
1342 if (DstTy.isVector())
1343 return UnableToLegalize;
1344
1345 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1346 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1347 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1348 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1349 LoadMI.eraseFromParent();
1350 return Legalized;
1351 }
1352
1353 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1354 }
1355 case TargetOpcode::G_ZEXTLOAD:
1356 case TargetOpcode::G_SEXTLOAD: {
1357 auto &LoadMI = cast<GExtLoad>(MI);
1358 Register DstReg = LoadMI.getDstReg();
1359 Register PtrReg = LoadMI.getPointerReg();
1360
1361 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1362 auto &MMO = LoadMI.getMMO();
1363 unsigned MemSize = MMO.getSizeInBits().getValue();
1364
1365 if (MemSize == NarrowSize) {
1366 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1367 } else if (MemSize < NarrowSize) {
1368 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1369 } else if (MemSize > NarrowSize) {
1370 // FIXME: Need to split the load.
1371 return UnableToLegalize;
1372 }
1373
1374 if (isa<GZExtLoad>(LoadMI))
1375 MIRBuilder.buildZExt(DstReg, TmpReg);
1376 else
1377 MIRBuilder.buildSExt(DstReg, TmpReg);
1378
1379 LoadMI.eraseFromParent();
1380 return Legalized;
1381 }
1382 case TargetOpcode::G_STORE: {
1383 auto &StoreMI = cast<GStore>(MI);
1384
1385 Register SrcReg = StoreMI.getValueReg();
1386 LLT SrcTy = MRI.getType(SrcReg);
1387 if (SrcTy.isVector())
1388 return UnableToLegalize;
1389
1390 int NumParts = SizeOp0 / NarrowSize;
1391 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1392 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1393 if (SrcTy.isVector() && LeftoverBits != 0)
1394 return UnableToLegalize;
1395
1396 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1397 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1398 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1399 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1400 StoreMI.eraseFromParent();
1401 return Legalized;
1402 }
1403
1404 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1405 }
1406 case TargetOpcode::G_SELECT:
1407 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1408 case TargetOpcode::G_AND:
1409 case TargetOpcode::G_OR:
1410 case TargetOpcode::G_XOR: {
1411 // Legalize bitwise operation:
1412 // A = BinOp<Ty> B, C
1413 // into:
1414 // B1, ..., BN = G_UNMERGE_VALUES B
1415 // C1, ..., CN = G_UNMERGE_VALUES C
1416 // A1 = BinOp<Ty/N> B1, C2
1417 // ...
1418 // AN = BinOp<Ty/N> BN, CN
1419 // A = G_MERGE_VALUES A1, ..., AN
1420 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1421 }
1422 case TargetOpcode::G_SHL:
1423 case TargetOpcode::G_LSHR:
1424 case TargetOpcode::G_ASHR:
1425 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1426 case TargetOpcode::G_CTLZ:
1427 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1428 case TargetOpcode::G_CTTZ:
1429 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1430 case TargetOpcode::G_CTPOP:
1431 if (TypeIdx == 1)
1432 switch (MI.getOpcode()) {
1433 case TargetOpcode::G_CTLZ:
1434 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1435 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1436 case TargetOpcode::G_CTTZ:
1437 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1438 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1439 case TargetOpcode::G_CTPOP:
1440 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1441 default:
1442 return UnableToLegalize;
1443 }
1444
1446 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1448 return Legalized;
1449 case TargetOpcode::G_INTTOPTR:
1450 if (TypeIdx != 1)
1451 return UnableToLegalize;
1452
1454 narrowScalarSrc(MI, NarrowTy, 1);
1456 return Legalized;
1457 case TargetOpcode::G_PTRTOINT:
1458 if (TypeIdx != 0)
1459 return UnableToLegalize;
1460
1462 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1464 return Legalized;
1465 case TargetOpcode::G_PHI: {
1466 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1467 // NarrowSize.
1468 if (SizeOp0 % NarrowSize != 0)
1469 return UnableToLegalize;
1470
1471 unsigned NumParts = SizeOp0 / NarrowSize;
1472 SmallVector<Register, 2> DstRegs(NumParts);
1473 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1475 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1476 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1478 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1479 SrcRegs[i / 2], MIRBuilder, MRI);
1480 }
1481 MachineBasicBlock &MBB = *MI.getParent();
1483 for (unsigned i = 0; i < NumParts; ++i) {
1484 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1486 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1487 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1488 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1489 }
1491 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1493 MI.eraseFromParent();
1494 return Legalized;
1495 }
1496 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1497 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1498 if (TypeIdx != 2)
1499 return UnableToLegalize;
1500
1501 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1503 narrowScalarSrc(MI, NarrowTy, OpIdx);
1505 return Legalized;
1506 }
1507 case TargetOpcode::G_ICMP: {
1508 Register LHS = MI.getOperand(2).getReg();
1509 LLT SrcTy = MRI.getType(LHS);
1510 uint64_t SrcSize = SrcTy.getSizeInBits();
1511 CmpInst::Predicate Pred =
1512 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1513
1514 // TODO: Handle the non-equality case for weird sizes.
1515 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1516 return UnableToLegalize;
1517
1518 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1519 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1520 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1521 LHSLeftoverRegs, MIRBuilder, MRI))
1522 return UnableToLegalize;
1523
1524 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1525 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1526 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1527 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1528 return UnableToLegalize;
1529
1530 // We now have the LHS and RHS of the compare split into narrow-type
1531 // registers, plus potentially some leftover type.
1532 Register Dst = MI.getOperand(0).getReg();
1533 LLT ResTy = MRI.getType(Dst);
1534 if (ICmpInst::isEquality(Pred)) {
1535 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1536 // them together. For each equal part, the result should be all 0s. For
1537 // each non-equal part, we'll get at least one 1.
1538 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1540 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1541 auto LHS = std::get<0>(LHSAndRHS);
1542 auto RHS = std::get<1>(LHSAndRHS);
1543 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1544 Xors.push_back(Xor);
1545 }
1546
1547 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1548 // to the desired narrow type so that we can OR them together later.
1549 SmallVector<Register, 4> WidenedXors;
1550 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1551 auto LHS = std::get<0>(LHSAndRHS);
1552 auto RHS = std::get<1>(LHSAndRHS);
1553 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1554 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1555 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1556 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1557 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1558 }
1559
1560 // Now, for each part we broke up, we know if they are equal/not equal
1561 // based off the G_XOR. We can OR these all together and compare against
1562 // 0 to get the result.
1563 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1564 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1565 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1566 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1567 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1568 } else {
1569 // TODO: Handle non-power-of-two types.
1570 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1571 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1572 Register LHSL = LHSPartRegs[0];
1573 Register LHSH = LHSPartRegs[1];
1574 Register RHSL = RHSPartRegs[0];
1575 Register RHSH = RHSPartRegs[1];
1576 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1577 MachineInstrBuilder CmpHEQ =
1580 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1581 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1582 }
1583 MI.eraseFromParent();
1584 return Legalized;
1585 }
1586 case TargetOpcode::G_FCMP:
1587 if (TypeIdx != 0)
1588 return UnableToLegalize;
1589
1591 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1593 return Legalized;
1594
1595 case TargetOpcode::G_SEXT_INREG: {
1596 if (TypeIdx != 0)
1597 return UnableToLegalize;
1598
1599 int64_t SizeInBits = MI.getOperand(2).getImm();
1600
1601 // So long as the new type has more bits than the bits we're extending we
1602 // don't need to break it apart.
1603 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1605 // We don't lose any non-extension bits by truncating the src and
1606 // sign-extending the dst.
1607 MachineOperand &MO1 = MI.getOperand(1);
1608 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1609 MO1.setReg(TruncMIB.getReg(0));
1610
1611 MachineOperand &MO2 = MI.getOperand(0);
1612 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1614 MIRBuilder.buildSExt(MO2, DstExt);
1615 MO2.setReg(DstExt);
1617 return Legalized;
1618 }
1619
1620 // Break it apart. Components below the extension point are unmodified. The
1621 // component containing the extension point becomes a narrower SEXT_INREG.
1622 // Components above it are ashr'd from the component containing the
1623 // extension point.
1624 if (SizeOp0 % NarrowSize != 0)
1625 return UnableToLegalize;
1626 int NumParts = SizeOp0 / NarrowSize;
1627
1628 // List the registers where the destination will be scattered.
1630 // List the registers where the source will be split.
1632
1633 // Create all the temporary registers.
1634 for (int i = 0; i < NumParts; ++i) {
1635 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1636
1637 SrcRegs.push_back(SrcReg);
1638 }
1639
1640 // Explode the big arguments into smaller chunks.
1641 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1642
1643 Register AshrCstReg =
1644 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1645 .getReg(0);
1646 Register FullExtensionReg;
1647 Register PartialExtensionReg;
1648
1649 // Do the operation on each small part.
1650 for (int i = 0; i < NumParts; ++i) {
1651 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1652 DstRegs.push_back(SrcRegs[i]);
1653 PartialExtensionReg = DstRegs.back();
1654 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1655 assert(PartialExtensionReg &&
1656 "Expected to visit partial extension before full");
1657 if (FullExtensionReg) {
1658 DstRegs.push_back(FullExtensionReg);
1659 continue;
1660 }
1661 DstRegs.push_back(
1662 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1663 .getReg(0));
1664 FullExtensionReg = DstRegs.back();
1665 } else {
1666 DstRegs.push_back(
1668 .buildInstr(
1669 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1670 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1671 .getReg(0));
1672 PartialExtensionReg = DstRegs.back();
1673 }
1674 }
1675
1676 // Gather the destination registers into the final destination.
1677 Register DstReg = MI.getOperand(0).getReg();
1678 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1679 MI.eraseFromParent();
1680 return Legalized;
1681 }
1682 case TargetOpcode::G_BSWAP:
1683 case TargetOpcode::G_BITREVERSE: {
1684 if (SizeOp0 % NarrowSize != 0)
1685 return UnableToLegalize;
1686
1688 SmallVector<Register, 2> SrcRegs, DstRegs;
1689 unsigned NumParts = SizeOp0 / NarrowSize;
1690 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1691 MIRBuilder, MRI);
1692
1693 for (unsigned i = 0; i < NumParts; ++i) {
1694 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1695 {SrcRegs[NumParts - 1 - i]});
1696 DstRegs.push_back(DstPart.getReg(0));
1697 }
1698
1699 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1700
1702 MI.eraseFromParent();
1703 return Legalized;
1704 }
1705 case TargetOpcode::G_PTR_ADD:
1706 case TargetOpcode::G_PTRMASK: {
1707 if (TypeIdx != 1)
1708 return UnableToLegalize;
1710 narrowScalarSrc(MI, NarrowTy, 2);
1712 return Legalized;
1713 }
1714 case TargetOpcode::G_FPTOUI:
1715 case TargetOpcode::G_FPTOSI:
1716 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1717 case TargetOpcode::G_FPEXT:
1718 if (TypeIdx != 0)
1719 return UnableToLegalize;
1721 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1723 return Legalized;
1724 case TargetOpcode::G_FLDEXP:
1725 case TargetOpcode::G_STRICT_FLDEXP:
1726 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1727 case TargetOpcode::G_VSCALE: {
1728 Register Dst = MI.getOperand(0).getReg();
1729 LLT Ty = MRI.getType(Dst);
1730
1731 // Assume VSCALE(1) fits into a legal integer
1732 const APInt One(NarrowTy.getSizeInBits(), 1);
1733 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1734 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1735 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1736 MIRBuilder.buildMul(Dst, ZExt, C);
1737
1738 MI.eraseFromParent();
1739 return Legalized;
1740 }
1741 }
1742}
1743
1745 LLT Ty = MRI.getType(Val);
1746 if (Ty.isScalar())
1747 return Val;
1748
1750 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1751 if (Ty.isPointer()) {
1752 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1753 return Register();
1754 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1755 }
1756
1757 Register NewVal = Val;
1758
1759 assert(Ty.isVector());
1760 if (Ty.isPointerVector())
1761 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1762 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1763}
1764
1766 unsigned OpIdx, unsigned ExtOpcode) {
1767 MachineOperand &MO = MI.getOperand(OpIdx);
1768 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1769 MO.setReg(ExtB.getReg(0));
1770}
1771
1773 unsigned OpIdx) {
1774 MachineOperand &MO = MI.getOperand(OpIdx);
1775 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1776 MO.setReg(ExtB.getReg(0));
1777}
1778
1780 unsigned OpIdx, unsigned TruncOpcode) {
1781 MachineOperand &MO = MI.getOperand(OpIdx);
1782 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1784 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1785 MO.setReg(DstExt);
1786}
1787
1789 unsigned OpIdx, unsigned ExtOpcode) {
1790 MachineOperand &MO = MI.getOperand(OpIdx);
1791 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1793 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1794 MO.setReg(DstTrunc);
1795}
1796
1798 unsigned OpIdx) {
1799 MachineOperand &MO = MI.getOperand(OpIdx);
1801 Register Dst = MO.getReg();
1802 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1803 MO.setReg(DstExt);
1805}
1806
1808 unsigned OpIdx) {
1809 MachineOperand &MO = MI.getOperand(OpIdx);
1812}
1813
1814void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1815 MachineOperand &Op = MI.getOperand(OpIdx);
1816 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1817}
1818
1819void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1820 MachineOperand &MO = MI.getOperand(OpIdx);
1821 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1823 MIRBuilder.buildBitcast(MO, CastDst);
1824 MO.setReg(CastDst);
1825}
1826
1828LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1829 LLT WideTy) {
1830 if (TypeIdx != 1)
1831 return UnableToLegalize;
1832
1833 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1834 if (DstTy.isVector())
1835 return UnableToLegalize;
1836
1837 LLT SrcTy = MRI.getType(Src1Reg);
1838 const int DstSize = DstTy.getSizeInBits();
1839 const int SrcSize = SrcTy.getSizeInBits();
1840 const int WideSize = WideTy.getSizeInBits();
1841 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1842
1843 unsigned NumOps = MI.getNumOperands();
1844 unsigned NumSrc = MI.getNumOperands() - 1;
1845 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1846
1847 if (WideSize >= DstSize) {
1848 // Directly pack the bits in the target type.
1849 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1850
1851 for (unsigned I = 2; I != NumOps; ++I) {
1852 const unsigned Offset = (I - 1) * PartSize;
1853
1854 Register SrcReg = MI.getOperand(I).getReg();
1855 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1856
1857 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1858
1859 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1860 MRI.createGenericVirtualRegister(WideTy);
1861
1862 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1863 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1864 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1865 ResultReg = NextResult;
1866 }
1867
1868 if (WideSize > DstSize)
1869 MIRBuilder.buildTrunc(DstReg, ResultReg);
1870 else if (DstTy.isPointer())
1871 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1872
1873 MI.eraseFromParent();
1874 return Legalized;
1875 }
1876
1877 // Unmerge the original values to the GCD type, and recombine to the next
1878 // multiple greater than the original type.
1879 //
1880 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1881 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1882 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1883 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1884 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1885 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1886 // %12:_(s12) = G_MERGE_VALUES %10, %11
1887 //
1888 // Padding with undef if necessary:
1889 //
1890 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1891 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1892 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1893 // %7:_(s2) = G_IMPLICIT_DEF
1894 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1895 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1896 // %10:_(s12) = G_MERGE_VALUES %8, %9
1897
1898 const int GCD = std::gcd(SrcSize, WideSize);
1899 LLT GCDTy = LLT::scalar(GCD);
1900
1902 SmallVector<Register, 8> NewMergeRegs;
1903 SmallVector<Register, 8> Unmerges;
1904 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1905
1906 // Decompose the original operands if they don't evenly divide.
1907 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1908 Register SrcReg = MO.getReg();
1909 if (GCD == SrcSize) {
1910 Unmerges.push_back(SrcReg);
1911 } else {
1912 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1913 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1914 Unmerges.push_back(Unmerge.getReg(J));
1915 }
1916 }
1917
1918 // Pad with undef to the next size that is a multiple of the requested size.
1919 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1920 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1921 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1922 Unmerges.push_back(UndefReg);
1923 }
1924
1925 const int PartsPerGCD = WideSize / GCD;
1926
1927 // Build merges of each piece.
1928 ArrayRef<Register> Slicer(Unmerges);
1929 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1930 auto Merge =
1931 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1932 NewMergeRegs.push_back(Merge.getReg(0));
1933 }
1934
1935 // A truncate may be necessary if the requested type doesn't evenly divide the
1936 // original result type.
1937 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1938 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1939 } else {
1940 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1941 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1942 }
1943
1944 MI.eraseFromParent();
1945 return Legalized;
1946}
1947
1949LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1950 LLT WideTy) {
1951 if (TypeIdx != 0)
1952 return UnableToLegalize;
1953
1954 int NumDst = MI.getNumOperands() - 1;
1955 Register SrcReg = MI.getOperand(NumDst).getReg();
1956 LLT SrcTy = MRI.getType(SrcReg);
1957 if (SrcTy.isVector())
1958 return UnableToLegalize;
1959
1960 Register Dst0Reg = MI.getOperand(0).getReg();
1961 LLT DstTy = MRI.getType(Dst0Reg);
1962 if (!DstTy.isScalar())
1963 return UnableToLegalize;
1964
1965 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1966 if (SrcTy.isPointer()) {
1968 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1969 LLVM_DEBUG(
1970 dbgs() << "Not casting non-integral address space integer\n");
1971 return UnableToLegalize;
1972 }
1973
1974 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1975 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1976 }
1977
1978 // Widen SrcTy to WideTy. This does not affect the result, but since the
1979 // user requested this size, it is probably better handled than SrcTy and
1980 // should reduce the total number of legalization artifacts.
1981 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1982 SrcTy = WideTy;
1983 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1984 }
1985
1986 // Theres no unmerge type to target. Directly extract the bits from the
1987 // source type
1988 unsigned DstSize = DstTy.getSizeInBits();
1989
1990 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1991 for (int I = 1; I != NumDst; ++I) {
1992 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1993 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1994 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1995 }
1996
1997 MI.eraseFromParent();
1998 return Legalized;
1999 }
2000
2001 // Extend the source to a wider type.
2002 LLT LCMTy = getLCMType(SrcTy, WideTy);
2003
2004 Register WideSrc = SrcReg;
2005 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2006 // TODO: If this is an integral address space, cast to integer and anyext.
2007 if (SrcTy.isPointer()) {
2008 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2009 return UnableToLegalize;
2010 }
2011
2012 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2013 }
2014
2015 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2016
2017 // Create a sequence of unmerges and merges to the original results. Since we
2018 // may have widened the source, we will need to pad the results with dead defs
2019 // to cover the source register.
2020 // e.g. widen s48 to s64:
2021 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2022 //
2023 // =>
2024 // %4:_(s192) = G_ANYEXT %0:_(s96)
2025 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2026 // ; unpack to GCD type, with extra dead defs
2027 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2028 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2029 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2030 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2031 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2032 const LLT GCDTy = getGCDType(WideTy, DstTy);
2033 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2034 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2035
2036 // Directly unmerge to the destination without going through a GCD type
2037 // if possible
2038 if (PartsPerRemerge == 1) {
2039 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2040
2041 for (int I = 0; I != NumUnmerge; ++I) {
2042 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2043
2044 for (int J = 0; J != PartsPerUnmerge; ++J) {
2045 int Idx = I * PartsPerUnmerge + J;
2046 if (Idx < NumDst)
2047 MIB.addDef(MI.getOperand(Idx).getReg());
2048 else {
2049 // Create dead def for excess components.
2050 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2051 }
2052 }
2053
2054 MIB.addUse(Unmerge.getReg(I));
2055 }
2056 } else {
2058 for (int J = 0; J != NumUnmerge; ++J)
2059 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2060
2061 SmallVector<Register, 8> RemergeParts;
2062 for (int I = 0; I != NumDst; ++I) {
2063 for (int J = 0; J < PartsPerRemerge; ++J) {
2064 const int Idx = I * PartsPerRemerge + J;
2065 RemergeParts.emplace_back(Parts[Idx]);
2066 }
2067
2068 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2069 RemergeParts.clear();
2070 }
2071 }
2072
2073 MI.eraseFromParent();
2074 return Legalized;
2075}
2076
2078LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2079 LLT WideTy) {
2080 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2081 unsigned Offset = MI.getOperand(2).getImm();
2082
2083 if (TypeIdx == 0) {
2084 if (SrcTy.isVector() || DstTy.isVector())
2085 return UnableToLegalize;
2086
2087 SrcOp Src(SrcReg);
2088 if (SrcTy.isPointer()) {
2089 // Extracts from pointers can be handled only if they are really just
2090 // simple integers.
2092 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2093 return UnableToLegalize;
2094
2095 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2096 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2097 SrcTy = SrcAsIntTy;
2098 }
2099
2100 if (DstTy.isPointer())
2101 return UnableToLegalize;
2102
2103 if (Offset == 0) {
2104 // Avoid a shift in the degenerate case.
2105 MIRBuilder.buildTrunc(DstReg,
2106 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2107 MI.eraseFromParent();
2108 return Legalized;
2109 }
2110
2111 // Do a shift in the source type.
2112 LLT ShiftTy = SrcTy;
2113 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2114 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2115 ShiftTy = WideTy;
2116 }
2117
2118 auto LShr = MIRBuilder.buildLShr(
2119 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2120 MIRBuilder.buildTrunc(DstReg, LShr);
2121 MI.eraseFromParent();
2122 return Legalized;
2123 }
2124
2125 if (SrcTy.isScalar()) {
2127 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2129 return Legalized;
2130 }
2131
2132 if (!SrcTy.isVector())
2133 return UnableToLegalize;
2134
2135 if (DstTy != SrcTy.getElementType())
2136 return UnableToLegalize;
2137
2138 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2139 return UnableToLegalize;
2140
2142 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2143
2144 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2145 Offset);
2146 widenScalarDst(MI, WideTy.getScalarType(), 0);
2148 return Legalized;
2149}
2150
2152LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2153 LLT WideTy) {
2154 if (TypeIdx != 0 || WideTy.isVector())
2155 return UnableToLegalize;
2157 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2158 widenScalarDst(MI, WideTy);
2160 return Legalized;
2161}
2162
2164LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2165 LLT WideTy) {
2166 unsigned Opcode;
2167 unsigned ExtOpcode;
2168 std::optional<Register> CarryIn;
2169 switch (MI.getOpcode()) {
2170 default:
2171 llvm_unreachable("Unexpected opcode!");
2172 case TargetOpcode::G_SADDO:
2173 Opcode = TargetOpcode::G_ADD;
2174 ExtOpcode = TargetOpcode::G_SEXT;
2175 break;
2176 case TargetOpcode::G_SSUBO:
2177 Opcode = TargetOpcode::G_SUB;
2178 ExtOpcode = TargetOpcode::G_SEXT;
2179 break;
2180 case TargetOpcode::G_UADDO:
2181 Opcode = TargetOpcode::G_ADD;
2182 ExtOpcode = TargetOpcode::G_ZEXT;
2183 break;
2184 case TargetOpcode::G_USUBO:
2185 Opcode = TargetOpcode::G_SUB;
2186 ExtOpcode = TargetOpcode::G_ZEXT;
2187 break;
2188 case TargetOpcode::G_SADDE:
2189 Opcode = TargetOpcode::G_UADDE;
2190 ExtOpcode = TargetOpcode::G_SEXT;
2191 CarryIn = MI.getOperand(4).getReg();
2192 break;
2193 case TargetOpcode::G_SSUBE:
2194 Opcode = TargetOpcode::G_USUBE;
2195 ExtOpcode = TargetOpcode::G_SEXT;
2196 CarryIn = MI.getOperand(4).getReg();
2197 break;
2198 case TargetOpcode::G_UADDE:
2199 Opcode = TargetOpcode::G_UADDE;
2200 ExtOpcode = TargetOpcode::G_ZEXT;
2201 CarryIn = MI.getOperand(4).getReg();
2202 break;
2203 case TargetOpcode::G_USUBE:
2204 Opcode = TargetOpcode::G_USUBE;
2205 ExtOpcode = TargetOpcode::G_ZEXT;
2206 CarryIn = MI.getOperand(4).getReg();
2207 break;
2208 }
2209
2210 if (TypeIdx == 1) {
2211 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2212
2214 if (CarryIn)
2215 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2216 widenScalarDst(MI, WideTy, 1);
2217
2219 return Legalized;
2220 }
2221
2222 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2223 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2224 // Do the arithmetic in the larger type.
2225 Register NewOp;
2226 if (CarryIn) {
2227 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2228 NewOp = MIRBuilder
2229 .buildInstr(Opcode, {WideTy, CarryOutTy},
2230 {LHSExt, RHSExt, *CarryIn})
2231 .getReg(0);
2232 } else {
2233 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2234 }
2235 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2236 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2237 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2238 // There is no overflow if the ExtOp is the same as NewOp.
2239 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2240 // Now trunc the NewOp to the original result.
2241 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2242 MI.eraseFromParent();
2243 return Legalized;
2244}
2245
2247LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2248 LLT WideTy) {
2249 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2250 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2251 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2252 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2253 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2254 // We can convert this to:
2255 // 1. Any extend iN to iM
2256 // 2. SHL by M-N
2257 // 3. [US][ADD|SUB|SHL]SAT
2258 // 4. L/ASHR by M-N
2259 //
2260 // It may be more efficient to lower this to a min and a max operation in
2261 // the higher precision arithmetic if the promoted operation isn't legal,
2262 // but this decision is up to the target's lowering request.
2263 Register DstReg = MI.getOperand(0).getReg();
2264
2265 unsigned NewBits = WideTy.getScalarSizeInBits();
2266 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2267
2268 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2269 // must not left shift the RHS to preserve the shift amount.
2270 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2271 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2272 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2273 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2274 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2275 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2276
2277 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2278 {ShiftL, ShiftR}, MI.getFlags());
2279
2280 // Use a shift that will preserve the number of sign bits when the trunc is
2281 // folded away.
2282 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2283 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2284
2285 MIRBuilder.buildTrunc(DstReg, Result);
2286 MI.eraseFromParent();
2287 return Legalized;
2288}
2289
2291LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2292 LLT WideTy) {
2293 if (TypeIdx == 1) {
2295 widenScalarDst(MI, WideTy, 1);
2297 return Legalized;
2298 }
2299
2300 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2301 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2302 LLT SrcTy = MRI.getType(LHS);
2303 LLT OverflowTy = MRI.getType(OriginalOverflow);
2304 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2305
2306 // To determine if the result overflowed in the larger type, we extend the
2307 // input to the larger type, do the multiply (checking if it overflows),
2308 // then also check the high bits of the result to see if overflow happened
2309 // there.
2310 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2311 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2312 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2313
2314 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2315 // so we don't need to check the overflow result of larger type Mulo.
2316 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2317
2318 unsigned MulOpc =
2319 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2320
2322 if (WideMulCanOverflow)
2323 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2324 {LeftOperand, RightOperand});
2325 else
2326 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2327
2328 auto Mul = Mulo->getOperand(0);
2329 MIRBuilder.buildTrunc(Result, Mul);
2330
2331 MachineInstrBuilder ExtResult;
2332 // Overflow occurred if it occurred in the larger type, or if the high part
2333 // of the result does not zero/sign-extend the low part. Check this second
2334 // possibility first.
2335 if (IsSigned) {
2336 // For signed, overflow occurred when the high part does not sign-extend
2337 // the low part.
2338 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2339 } else {
2340 // Unsigned overflow occurred when the high part does not zero-extend the
2341 // low part.
2342 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2343 }
2344
2345 if (WideMulCanOverflow) {
2346 auto Overflow =
2347 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2348 // Finally check if the multiplication in the larger type itself overflowed.
2349 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2350 } else {
2351 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2352 }
2353 MI.eraseFromParent();
2354 return Legalized;
2355}
2356
2359 switch (MI.getOpcode()) {
2360 default:
2361 return UnableToLegalize;
2362 case TargetOpcode::G_ATOMICRMW_XCHG:
2363 case TargetOpcode::G_ATOMICRMW_ADD:
2364 case TargetOpcode::G_ATOMICRMW_SUB:
2365 case TargetOpcode::G_ATOMICRMW_AND:
2366 case TargetOpcode::G_ATOMICRMW_OR:
2367 case TargetOpcode::G_ATOMICRMW_XOR:
2368 case TargetOpcode::G_ATOMICRMW_MIN:
2369 case TargetOpcode::G_ATOMICRMW_MAX:
2370 case TargetOpcode::G_ATOMICRMW_UMIN:
2371 case TargetOpcode::G_ATOMICRMW_UMAX:
2372 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2374 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2375 widenScalarDst(MI, WideTy, 0);
2377 return Legalized;
2378 case TargetOpcode::G_ATOMIC_CMPXCHG:
2379 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2381 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2382 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2383 widenScalarDst(MI, WideTy, 0);
2385 return Legalized;
2386 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2387 if (TypeIdx == 0) {
2389 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2390 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2391 widenScalarDst(MI, WideTy, 0);
2393 return Legalized;
2394 }
2395 assert(TypeIdx == 1 &&
2396 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2398 widenScalarDst(MI, WideTy, 1);
2400 return Legalized;
2401 case TargetOpcode::G_EXTRACT:
2402 return widenScalarExtract(MI, TypeIdx, WideTy);
2403 case TargetOpcode::G_INSERT:
2404 return widenScalarInsert(MI, TypeIdx, WideTy);
2405 case TargetOpcode::G_MERGE_VALUES:
2406 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2407 case TargetOpcode::G_UNMERGE_VALUES:
2408 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2409 case TargetOpcode::G_SADDO:
2410 case TargetOpcode::G_SSUBO:
2411 case TargetOpcode::G_UADDO:
2412 case TargetOpcode::G_USUBO:
2413 case TargetOpcode::G_SADDE:
2414 case TargetOpcode::G_SSUBE:
2415 case TargetOpcode::G_UADDE:
2416 case TargetOpcode::G_USUBE:
2417 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2418 case TargetOpcode::G_UMULO:
2419 case TargetOpcode::G_SMULO:
2420 return widenScalarMulo(MI, TypeIdx, WideTy);
2421 case TargetOpcode::G_SADDSAT:
2422 case TargetOpcode::G_SSUBSAT:
2423 case TargetOpcode::G_SSHLSAT:
2424 case TargetOpcode::G_UADDSAT:
2425 case TargetOpcode::G_USUBSAT:
2426 case TargetOpcode::G_USHLSAT:
2427 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2428 case TargetOpcode::G_CTTZ:
2429 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2430 case TargetOpcode::G_CTLZ:
2431 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2432 case TargetOpcode::G_CTPOP: {
2433 if (TypeIdx == 0) {
2435 widenScalarDst(MI, WideTy, 0);
2437 return Legalized;
2438 }
2439
2440 Register SrcReg = MI.getOperand(1).getReg();
2441
2442 // First extend the input.
2443 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2444 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2445 ? TargetOpcode::G_ANYEXT
2446 : TargetOpcode::G_ZEXT;
2447 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2448 LLT CurTy = MRI.getType(SrcReg);
2449 unsigned NewOpc = MI.getOpcode();
2450 if (NewOpc == TargetOpcode::G_CTTZ) {
2451 // The count is the same in the larger type except if the original
2452 // value was zero. This can be handled by setting the bit just off
2453 // the top of the original type.
2454 auto TopBit =
2456 MIBSrc = MIRBuilder.buildOr(
2457 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2458 // Now we know the operand is non-zero, use the more relaxed opcode.
2459 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2460 }
2461
2462 // Perform the operation at the larger size.
2463 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2464 // This is already the correct result for CTPOP and CTTZs
2465 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2466 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2467 // The correct result is NewOp - (Difference in widety and current ty).
2468 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2469 MIBNewOp = MIRBuilder.buildSub(
2470 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2471 }
2472
2473 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2474 MI.eraseFromParent();
2475 return Legalized;
2476 }
2477 case TargetOpcode::G_BSWAP: {
2479 Register DstReg = MI.getOperand(0).getReg();
2480
2481 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2482 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2483 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2484 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2485
2486 MI.getOperand(0).setReg(DstExt);
2487
2489
2490 LLT Ty = MRI.getType(DstReg);
2491 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2492 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2493 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2494
2495 MIRBuilder.buildTrunc(DstReg, ShrReg);
2497 return Legalized;
2498 }
2499 case TargetOpcode::G_BITREVERSE: {
2501
2502 Register DstReg = MI.getOperand(0).getReg();
2503 LLT Ty = MRI.getType(DstReg);
2504 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2505
2506 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2507 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2508 MI.getOperand(0).setReg(DstExt);
2510
2511 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2512 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2513 MIRBuilder.buildTrunc(DstReg, Shift);
2515 return Legalized;
2516 }
2517 case TargetOpcode::G_FREEZE:
2519 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2520 widenScalarDst(MI, WideTy);
2522 return Legalized;
2523
2524 case TargetOpcode::G_ABS:
2526 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2527 widenScalarDst(MI, WideTy);
2529 return Legalized;
2530
2531 case TargetOpcode::G_ADD:
2532 case TargetOpcode::G_AND:
2533 case TargetOpcode::G_MUL:
2534 case TargetOpcode::G_OR:
2535 case TargetOpcode::G_XOR:
2536 case TargetOpcode::G_SUB:
2537 case TargetOpcode::G_SHUFFLE_VECTOR:
2538 // Perform operation at larger width (any extension is fines here, high bits
2539 // don't affect the result) and then truncate the result back to the
2540 // original type.
2542 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2543 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2544 widenScalarDst(MI, WideTy);
2546 return Legalized;
2547
2548 case TargetOpcode::G_SBFX:
2549 case TargetOpcode::G_UBFX:
2551
2552 if (TypeIdx == 0) {
2553 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2554 widenScalarDst(MI, WideTy);
2555 } else {
2556 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2557 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2558 }
2559
2561 return Legalized;
2562
2563 case TargetOpcode::G_SHL:
2565
2566 if (TypeIdx == 0) {
2567 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2568 widenScalarDst(MI, WideTy);
2569 } else {
2570 assert(TypeIdx == 1);
2571 // The "number of bits to shift" operand must preserve its value as an
2572 // unsigned integer:
2573 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2574 }
2575
2577 return Legalized;
2578
2579 case TargetOpcode::G_ROTR:
2580 case TargetOpcode::G_ROTL:
2581 if (TypeIdx != 1)
2582 return UnableToLegalize;
2583
2585 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2587 return Legalized;
2588
2589 case TargetOpcode::G_SDIV:
2590 case TargetOpcode::G_SREM:
2591 case TargetOpcode::G_SMIN:
2592 case TargetOpcode::G_SMAX:
2594 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2595 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2596 widenScalarDst(MI, WideTy);
2598 return Legalized;
2599
2600 case TargetOpcode::G_SDIVREM:
2602 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2603 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2604 widenScalarDst(MI, WideTy);
2605 widenScalarDst(MI, WideTy, 1);
2607 return Legalized;
2608
2609 case TargetOpcode::G_ASHR:
2610 case TargetOpcode::G_LSHR:
2612
2613 if (TypeIdx == 0) {
2614 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2615 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2616
2617 widenScalarSrc(MI, WideTy, 1, CvtOp);
2618 widenScalarDst(MI, WideTy);
2619 } else {
2620 assert(TypeIdx == 1);
2621 // The "number of bits to shift" operand must preserve its value as an
2622 // unsigned integer:
2623 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2624 }
2625
2627 return Legalized;
2628 case TargetOpcode::G_UDIV:
2629 case TargetOpcode::G_UREM:
2630 case TargetOpcode::G_UMIN:
2631 case TargetOpcode::G_UMAX:
2633 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2634 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2635 widenScalarDst(MI, WideTy);
2637 return Legalized;
2638
2639 case TargetOpcode::G_UDIVREM:
2641 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2642 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2643 widenScalarDst(MI, WideTy);
2644 widenScalarDst(MI, WideTy, 1);
2646 return Legalized;
2647
2648 case TargetOpcode::G_SELECT:
2650 if (TypeIdx == 0) {
2651 // Perform operation at larger width (any extension is fine here, high
2652 // bits don't affect the result) and then truncate the result back to the
2653 // original type.
2654 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2655 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2656 widenScalarDst(MI, WideTy);
2657 } else {
2658 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2659 // Explicit extension is required here since high bits affect the result.
2660 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2661 }
2663 return Legalized;
2664
2665 case TargetOpcode::G_FPTOSI:
2666 case TargetOpcode::G_FPTOUI:
2667 case TargetOpcode::G_INTRINSIC_LRINT:
2668 case TargetOpcode::G_INTRINSIC_LLRINT:
2669 case TargetOpcode::G_IS_FPCLASS:
2671
2672 if (TypeIdx == 0)
2673 widenScalarDst(MI, WideTy);
2674 else
2675 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2676
2678 return Legalized;
2679 case TargetOpcode::G_SITOFP:
2681
2682 if (TypeIdx == 0)
2683 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2684 else
2685 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2686
2688 return Legalized;
2689 case TargetOpcode::G_UITOFP:
2691
2692 if (TypeIdx == 0)
2693 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2694 else
2695 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2696
2698 return Legalized;
2699 case TargetOpcode::G_LOAD:
2700 case TargetOpcode::G_SEXTLOAD:
2701 case TargetOpcode::G_ZEXTLOAD:
2703 widenScalarDst(MI, WideTy);
2705 return Legalized;
2706
2707 case TargetOpcode::G_STORE: {
2708 if (TypeIdx != 0)
2709 return UnableToLegalize;
2710
2711 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2712 if (!Ty.isScalar())
2713 return UnableToLegalize;
2714
2716
2717 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2718 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2719 widenScalarSrc(MI, WideTy, 0, ExtType);
2720
2722 return Legalized;
2723 }
2724 case TargetOpcode::G_CONSTANT: {
2725 MachineOperand &SrcMO = MI.getOperand(1);
2727 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2728 MRI.getType(MI.getOperand(0).getReg()));
2729 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2730 ExtOpc == TargetOpcode::G_ANYEXT) &&
2731 "Illegal Extend");
2732 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2733 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2734 ? SrcVal.sext(WideTy.getSizeInBits())
2735 : SrcVal.zext(WideTy.getSizeInBits());
2737 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2738
2739 widenScalarDst(MI, WideTy);
2741 return Legalized;
2742 }
2743 case TargetOpcode::G_FCONSTANT: {
2744 // To avoid changing the bits of the constant due to extension to a larger
2745 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2746 MachineOperand &SrcMO = MI.getOperand(1);
2747 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2749 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2750 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2751 MI.eraseFromParent();
2752 return Legalized;
2753 }
2754 case TargetOpcode::G_IMPLICIT_DEF: {
2756 widenScalarDst(MI, WideTy);
2758 return Legalized;
2759 }
2760 case TargetOpcode::G_BRCOND:
2762 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2764 return Legalized;
2765
2766 case TargetOpcode::G_FCMP:
2768 if (TypeIdx == 0)
2769 widenScalarDst(MI, WideTy);
2770 else {
2771 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2772 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2773 }
2775 return Legalized;
2776
2777 case TargetOpcode::G_ICMP:
2779 if (TypeIdx == 0)
2780 widenScalarDst(MI, WideTy);
2781 else {
2782 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2783 MI.getOperand(1).getPredicate()))
2784 ? TargetOpcode::G_SEXT
2785 : TargetOpcode::G_ZEXT;
2786 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2787 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2788 }
2790 return Legalized;
2791
2792 case TargetOpcode::G_PTR_ADD:
2793 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2795 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2797 return Legalized;
2798
2799 case TargetOpcode::G_PHI: {
2800 assert(TypeIdx == 0 && "Expecting only Idx 0");
2801
2803 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2804 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2806 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2807 }
2808
2809 MachineBasicBlock &MBB = *MI.getParent();
2811 widenScalarDst(MI, WideTy);
2813 return Legalized;
2814 }
2815 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2816 if (TypeIdx == 0) {
2817 Register VecReg = MI.getOperand(1).getReg();
2818 LLT VecTy = MRI.getType(VecReg);
2820
2822 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2823 TargetOpcode::G_ANYEXT);
2824
2825 widenScalarDst(MI, WideTy, 0);
2827 return Legalized;
2828 }
2829
2830 if (TypeIdx != 2)
2831 return UnableToLegalize;
2833 // TODO: Probably should be zext
2834 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2836 return Legalized;
2837 }
2838 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2839 if (TypeIdx == 0) {
2841 const LLT WideEltTy = WideTy.getElementType();
2842
2843 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2844 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2845 widenScalarDst(MI, WideTy, 0);
2847 return Legalized;
2848 }
2849
2850 if (TypeIdx == 1) {
2852
2853 Register VecReg = MI.getOperand(1).getReg();
2854 LLT VecTy = MRI.getType(VecReg);
2855 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2856
2857 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2858 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2859 widenScalarDst(MI, WideVecTy, 0);
2861 return Legalized;
2862 }
2863
2864 if (TypeIdx == 2) {
2866 // TODO: Probably should be zext
2867 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2869 return Legalized;
2870 }
2871
2872 return UnableToLegalize;
2873 }
2874 case TargetOpcode::G_FADD:
2875 case TargetOpcode::G_FMUL:
2876 case TargetOpcode::G_FSUB:
2877 case TargetOpcode::G_FMA:
2878 case TargetOpcode::G_FMAD:
2879 case TargetOpcode::G_FNEG:
2880 case TargetOpcode::G_FABS:
2881 case TargetOpcode::G_FCANONICALIZE:
2882 case TargetOpcode::G_FMINNUM:
2883 case TargetOpcode::G_FMAXNUM:
2884 case TargetOpcode::G_FMINNUM_IEEE:
2885 case TargetOpcode::G_FMAXNUM_IEEE:
2886 case TargetOpcode::G_FMINIMUM:
2887 case TargetOpcode::G_FMAXIMUM:
2888 case TargetOpcode::G_FDIV:
2889 case TargetOpcode::G_FREM:
2890 case TargetOpcode::G_FCEIL:
2891 case TargetOpcode::G_FFLOOR:
2892 case TargetOpcode::G_FCOS:
2893 case TargetOpcode::G_FSIN:
2894 case TargetOpcode::G_FLOG10:
2895 case TargetOpcode::G_FLOG:
2896 case TargetOpcode::G_FLOG2:
2897 case TargetOpcode::G_FRINT:
2898 case TargetOpcode::G_FNEARBYINT:
2899 case TargetOpcode::G_FSQRT:
2900 case TargetOpcode::G_FEXP:
2901 case TargetOpcode::G_FEXP2:
2902 case TargetOpcode::G_FEXP10:
2903 case TargetOpcode::G_FPOW:
2904 case TargetOpcode::G_INTRINSIC_TRUNC:
2905 case TargetOpcode::G_INTRINSIC_ROUND:
2906 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2907 assert(TypeIdx == 0);
2909
2910 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2911 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2912
2913 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2915 return Legalized;
2916 case TargetOpcode::G_FPOWI:
2917 case TargetOpcode::G_FLDEXP:
2918 case TargetOpcode::G_STRICT_FLDEXP: {
2919 if (TypeIdx == 0) {
2920 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2921 return UnableToLegalize;
2922
2924 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2925 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2927 return Legalized;
2928 }
2929
2930 if (TypeIdx == 1) {
2931 // For some reason SelectionDAG tries to promote to a libcall without
2932 // actually changing the integer type for promotion.
2934 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2936 return Legalized;
2937 }
2938
2939 return UnableToLegalize;
2940 }
2941 case TargetOpcode::G_FFREXP: {
2943
2944 if (TypeIdx == 0) {
2945 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2946 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2947 } else {
2948 widenScalarDst(MI, WideTy, 1);
2949 }
2950
2952 return Legalized;
2953 }
2954 case TargetOpcode::G_INTTOPTR:
2955 if (TypeIdx != 1)
2956 return UnableToLegalize;
2957
2959 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2961 return Legalized;
2962 case TargetOpcode::G_PTRTOINT:
2963 if (TypeIdx != 0)
2964 return UnableToLegalize;
2965
2967 widenScalarDst(MI, WideTy, 0);
2969 return Legalized;
2970 case TargetOpcode::G_BUILD_VECTOR: {
2972
2973 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2974 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2975 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2976
2977 // Avoid changing the result vector type if the source element type was
2978 // requested.
2979 if (TypeIdx == 1) {
2980 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2981 } else {
2982 widenScalarDst(MI, WideTy, 0);
2983 }
2984
2986 return Legalized;
2987 }
2988 case TargetOpcode::G_SEXT_INREG:
2989 if (TypeIdx != 0)
2990 return UnableToLegalize;
2991
2993 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2994 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2996 return Legalized;
2997 case TargetOpcode::G_PTRMASK: {
2998 if (TypeIdx != 1)
2999 return UnableToLegalize;
3001 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3003 return Legalized;
3004 }
3005 case TargetOpcode::G_VECREDUCE_FADD:
3006 case TargetOpcode::G_VECREDUCE_FMUL:
3007 case TargetOpcode::G_VECREDUCE_FMIN:
3008 case TargetOpcode::G_VECREDUCE_FMAX:
3009 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3010 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3011 if (TypeIdx != 0)
3012 return UnableToLegalize;
3014 Register VecReg = MI.getOperand(1).getReg();
3015 LLT VecTy = MRI.getType(VecReg);
3016 LLT WideVecTy = VecTy.isVector()
3017 ? LLT::vector(VecTy.getElementCount(), WideTy)
3018 : WideTy;
3019 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3020 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3022 return Legalized;
3023 }
3024 case TargetOpcode::G_VSCALE: {
3025 MachineOperand &SrcMO = MI.getOperand(1);
3027 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3028 // The CImm is always a signed value
3029 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3031 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3032 widenScalarDst(MI, WideTy);
3034 return Legalized;
3035 }
3036 case TargetOpcode::G_SPLAT_VECTOR: {
3037 if (TypeIdx != 1)
3038 return UnableToLegalize;
3039
3041 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3043 return Legalized;
3044 }
3045 }
3046}
3047
3049 MachineIRBuilder &B, Register Src, LLT Ty) {
3050 auto Unmerge = B.buildUnmerge(Ty, Src);
3051 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3052 Pieces.push_back(Unmerge.getReg(I));
3053}
3054
3055static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3056 MachineIRBuilder &MIRBuilder) {
3057 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3058 MachineFunction &MF = MIRBuilder.getMF();
3059 const DataLayout &DL = MIRBuilder.getDataLayout();
3060 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3061 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3062 LLT DstLLT = MRI.getType(DstReg);
3063
3064 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3065
3066 auto Addr = MIRBuilder.buildConstantPool(
3067 AddrPtrTy,
3068 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3069
3070 MachineMemOperand *MMO =
3072 MachineMemOperand::MOLoad, DstLLT, Alignment);
3073
3074 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3075}
3076
3079 const MachineOperand &ConstOperand = MI.getOperand(1);
3080 const Constant *ConstantVal = ConstOperand.getCImm();
3081
3082 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3083 MI.eraseFromParent();
3084
3085 return Legalized;
3086}
3087
3090 const MachineOperand &ConstOperand = MI.getOperand(1);
3091 const Constant *ConstantVal = ConstOperand.getFPImm();
3092
3093 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3094 MI.eraseFromParent();
3095
3096 return Legalized;
3097}
3098
3101 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3102 if (SrcTy.isVector()) {
3103 LLT SrcEltTy = SrcTy.getElementType();
3105
3106 if (DstTy.isVector()) {
3107 int NumDstElt = DstTy.getNumElements();
3108 int NumSrcElt = SrcTy.getNumElements();
3109
3110 LLT DstEltTy = DstTy.getElementType();
3111 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3112 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3113
3114 // If there's an element size mismatch, insert intermediate casts to match
3115 // the result element type.
3116 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3117 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3118 //
3119 // =>
3120 //
3121 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3122 // %3:_(<2 x s8>) = G_BITCAST %2
3123 // %4:_(<2 x s8>) = G_BITCAST %3
3124 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3125 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3126 SrcPartTy = SrcEltTy;
3127 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3128 //
3129 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3130 //
3131 // =>
3132 //
3133 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3134 // %3:_(s16) = G_BITCAST %2
3135 // %4:_(s16) = G_BITCAST %3
3136 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3137 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3138 DstCastTy = DstEltTy;
3139 }
3140
3141 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3142 for (Register &SrcReg : SrcRegs)
3143 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3144 } else
3145 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3146
3147 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3148 MI.eraseFromParent();
3149 return Legalized;
3150 }
3151
3152 if (DstTy.isVector()) {
3154 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3155 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3156 MI.eraseFromParent();
3157 return Legalized;
3158 }
3159
3160 return UnableToLegalize;
3161}
3162
3163/// Figure out the bit offset into a register when coercing a vector index for
3164/// the wide element type. This is only for the case when promoting vector to
3165/// one with larger elements.
3166//
3167///
3168/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3169/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3171 Register Idx,
3172 unsigned NewEltSize,
3173 unsigned OldEltSize) {
3174 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3175 LLT IdxTy = B.getMRI()->getType(Idx);
3176
3177 // Now figure out the amount we need to shift to get the target bits.
3178 auto OffsetMask = B.buildConstant(
3179 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3180 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3181 return B.buildShl(IdxTy, OffsetIdx,
3182 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3183}
3184
3185/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3186/// is casting to a vector with a smaller element size, perform multiple element
3187/// extracts and merge the results. If this is coercing to a vector with larger
3188/// elements, index the bitcasted vector and extract the target element with bit
3189/// operations. This is intended to force the indexing in the native register
3190/// size for architectures that can dynamically index the register file.
3193 LLT CastTy) {
3194 if (TypeIdx != 1)
3195 return UnableToLegalize;
3196
3197 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3198
3199 LLT SrcEltTy = SrcVecTy.getElementType();
3200 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3201 unsigned OldNumElts = SrcVecTy.getNumElements();
3202
3203 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3204 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3205
3206 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3207 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3208 if (NewNumElts > OldNumElts) {
3209 // Decreasing the vector element size
3210 //
3211 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3212 // =>
3213 // v4i32:castx = bitcast x:v2i64
3214 //
3215 // i64 = bitcast
3216 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3217 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3218 //
3219 if (NewNumElts % OldNumElts != 0)
3220 return UnableToLegalize;
3221
3222 // Type of the intermediate result vector.
3223 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3224 LLT MidTy =
3225 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3226
3227 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3228
3229 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3230 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3231
3232 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3233 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3234 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3235 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3236 NewOps[I] = Elt.getReg(0);
3237 }
3238
3239 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3240 MIRBuilder.buildBitcast(Dst, NewVec);
3241 MI.eraseFromParent();
3242 return Legalized;
3243 }
3244
3245 if (NewNumElts < OldNumElts) {
3246 if (NewEltSize % OldEltSize != 0)
3247 return UnableToLegalize;
3248
3249 // This only depends on powers of 2 because we use bit tricks to figure out
3250 // the bit offset we need to shift to get the target element. A general
3251 // expansion could emit division/multiply.
3252 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3253 return UnableToLegalize;
3254
3255 // Increasing the vector element size.
3256 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3257 //
3258 // =>
3259 //
3260 // %cast = G_BITCAST %vec
3261 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3262 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3263 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3264 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3265 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3266 // %elt = G_TRUNC %elt_bits
3267
3268 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3269 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3270
3271 // Divide to get the index in the wider element type.
3272 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3273
3274 Register WideElt = CastVec;
3275 if (CastTy.isVector()) {
3276 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3277 ScaledIdx).getReg(0);
3278 }
3279
3280 // Compute the bit offset into the register of the target element.
3282 MIRBuilder, Idx, NewEltSize, OldEltSize);
3283
3284 // Shift the wide element to get the target element.
3285 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3286 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3287 MI.eraseFromParent();
3288 return Legalized;
3289 }
3290
3291 return UnableToLegalize;
3292}
3293
3294/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3295/// TargetReg, while preserving other bits in \p TargetReg.
3296///
3297/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3299 Register TargetReg, Register InsertReg,
3300 Register OffsetBits) {
3301 LLT TargetTy = B.getMRI()->getType(TargetReg);
3302 LLT InsertTy = B.getMRI()->getType(InsertReg);
3303 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3304 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3305
3306 // Produce a bitmask of the value to insert
3307 auto EltMask = B.buildConstant(
3308 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3309 InsertTy.getSizeInBits()));
3310 // Shift it into position
3311 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3312 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3313
3314 // Clear out the bits in the wide element
3315 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3316
3317 // The value to insert has all zeros already, so stick it into the masked
3318 // wide element.
3319 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3320}
3321
3322/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3323/// is increasing the element size, perform the indexing in the target element
3324/// type, and use bit operations to insert at the element position. This is
3325/// intended for architectures that can dynamically index the register file and
3326/// want to force indexing in the native register size.
3329 LLT CastTy) {
3330 if (TypeIdx != 0)
3331 return UnableToLegalize;
3332
3333 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3334 MI.getFirst4RegLLTs();
3335 LLT VecTy = DstTy;
3336
3337 LLT VecEltTy = VecTy.getElementType();
3338 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3339 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3340 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3341
3342 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3343 unsigned OldNumElts = VecTy.getNumElements();
3344
3345 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3346 if (NewNumElts < OldNumElts) {
3347 if (NewEltSize % OldEltSize != 0)
3348 return UnableToLegalize;
3349
3350 // This only depends on powers of 2 because we use bit tricks to figure out
3351 // the bit offset we need to shift to get the target element. A general
3352 // expansion could emit division/multiply.
3353 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3354 return UnableToLegalize;
3355
3356 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3357 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3358
3359 // Divide to get the index in the wider element type.
3360 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3361
3362 Register ExtractedElt = CastVec;
3363 if (CastTy.isVector()) {
3364 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3365 ScaledIdx).getReg(0);
3366 }
3367
3368 // Compute the bit offset into the register of the target element.
3370 MIRBuilder, Idx, NewEltSize, OldEltSize);
3371
3372 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3373 Val, OffsetBits);
3374 if (CastTy.isVector()) {
3376 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3377 }
3378
3379 MIRBuilder.buildBitcast(Dst, InsertedElt);
3380 MI.eraseFromParent();
3381 return Legalized;
3382 }
3383
3384 return UnableToLegalize;
3385}
3386
3388 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3389 Register DstReg = LoadMI.getDstReg();
3390 Register PtrReg = LoadMI.getPointerReg();
3391 LLT DstTy = MRI.getType(DstReg);
3392 MachineMemOperand &MMO = LoadMI.getMMO();
3393 LLT MemTy = MMO.getMemoryType();
3395
3396 unsigned MemSizeInBits = MemTy.getSizeInBits();
3397 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3398
3399 if (MemSizeInBits != MemStoreSizeInBits) {
3400 if (MemTy.isVector())
3401 return UnableToLegalize;
3402
3403 // Promote to a byte-sized load if not loading an integral number of
3404 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3405 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3406 MachineMemOperand *NewMMO =
3407 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3408
3409 Register LoadReg = DstReg;
3410 LLT LoadTy = DstTy;
3411
3412 // If this wasn't already an extending load, we need to widen the result
3413 // register to avoid creating a load with a narrower result than the source.
3414 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3415 LoadTy = WideMemTy;
3416 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3417 }
3418
3419 if (isa<GSExtLoad>(LoadMI)) {
3420 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3421 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3422 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3423 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3424 // The extra bits are guaranteed to be zero, since we stored them that
3425 // way. A zext load from Wide thus automatically gives zext from MemVT.
3426 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3427 } else {
3428 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3429 }
3430
3431 if (DstTy != LoadTy)
3432 MIRBuilder.buildTrunc(DstReg, LoadReg);
3433
3434 LoadMI.eraseFromParent();
3435 return Legalized;
3436 }
3437
3438 // Big endian lowering not implemented.
3440 return UnableToLegalize;
3441
3442 // This load needs splitting into power of 2 sized loads.
3443 //
3444 // Our strategy here is to generate anyextending loads for the smaller
3445 // types up to next power-2 result type, and then combine the two larger
3446 // result values together, before truncating back down to the non-pow-2
3447 // type.
3448 // E.g. v1 = i24 load =>
3449 // v2 = i32 zextload (2 byte)
3450 // v3 = i32 load (1 byte)
3451 // v4 = i32 shl v3, 16
3452 // v5 = i32 or v4, v2
3453 // v1 = i24 trunc v5
3454 // By doing this we generate the correct truncate which should get
3455 // combined away as an artifact with a matching extend.
3456
3457 uint64_t LargeSplitSize, SmallSplitSize;
3458
3459 if (!isPowerOf2_32(MemSizeInBits)) {
3460 // This load needs splitting into power of 2 sized loads.
3461 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3462 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3463 } else {
3464 // This is already a power of 2, but we still need to split this in half.
3465 //
3466 // Assume we're being asked to decompose an unaligned load.
3467 // TODO: If this requires multiple splits, handle them all at once.
3468 auto &Ctx = MF.getFunction().getContext();
3469 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3470 return UnableToLegalize;
3471
3472 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3473 }
3474
3475 if (MemTy.isVector()) {
3476 // TODO: Handle vector extloads
3477 if (MemTy != DstTy)
3478 return UnableToLegalize;
3479
3480 // TODO: We can do better than scalarizing the vector and at least split it
3481 // in half.
3482 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3483 }
3484
3485 MachineMemOperand *LargeMMO =
3486 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3487 MachineMemOperand *SmallMMO =
3488 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3489
3490 LLT PtrTy = MRI.getType(PtrReg);
3491 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3492 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3493 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3494 PtrReg, *LargeMMO);
3495
3496 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3497 LargeSplitSize / 8);
3498 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3499 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3500 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3501 SmallPtr, *SmallMMO);
3502
3503 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3504 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3505
3506 if (AnyExtTy == DstTy)
3507 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3508 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3509 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3510 MIRBuilder.buildTrunc(DstReg, {Or});
3511 } else {
3512 assert(DstTy.isPointer() && "expected pointer");
3513 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3514
3515 // FIXME: We currently consider this to be illegal for non-integral address
3516 // spaces, but we need still need a way to reinterpret the bits.
3517 MIRBuilder.buildIntToPtr(DstReg, Or);
3518 }
3519
3520 LoadMI.eraseFromParent();
3521 return Legalized;
3522}
3523
3525 // Lower a non-power of 2 store into multiple pow-2 stores.
3526 // E.g. split an i24 store into an i16 store + i8 store.
3527 // We do this by first extending the stored value to the next largest power
3528 // of 2 type, and then using truncating stores to store the components.
3529 // By doing this, likewise with G_LOAD, generate an extend that can be
3530 // artifact-combined away instead of leaving behind extracts.
3531 Register SrcReg = StoreMI.getValueReg();
3532 Register PtrReg = StoreMI.getPointerReg();
3533 LLT SrcTy = MRI.getType(SrcReg);
3535 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3536 LLT MemTy = MMO.getMemoryType();
3537
3538 unsigned StoreWidth = MemTy.getSizeInBits();
3539 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3540
3541 if (StoreWidth != StoreSizeInBits) {
3542 if (SrcTy.isVector())
3543 return UnableToLegalize;
3544
3545 // Promote to a byte-sized store with upper bits zero if not
3546 // storing an integral number of bytes. For example, promote
3547 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3548 LLT WideTy = LLT::scalar(StoreSizeInBits);
3549
3550 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3551 // Avoid creating a store with a narrower source than result.
3552 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3553 SrcTy = WideTy;
3554 }
3555
3556 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3557
3558 MachineMemOperand *NewMMO =
3559 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3560 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3561 StoreMI.eraseFromParent();
3562 return Legalized;
3563 }
3564
3565 if (MemTy.isVector()) {
3566 // TODO: Handle vector trunc stores
3567 if (MemTy != SrcTy)
3568 return UnableToLegalize;
3569
3570 // TODO: We can do better than scalarizing the vector and at least split it
3571 // in half.
3572 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3573 }
3574
3575 unsigned MemSizeInBits = MemTy.getSizeInBits();
3576 uint64_t LargeSplitSize, SmallSplitSize;
3577
3578 if (!isPowerOf2_32(MemSizeInBits)) {
3579 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3580 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3581 } else {
3582 auto &Ctx = MF.getFunction().getContext();
3583 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3584 return UnableToLegalize; // Don't know what we're being asked to do.
3585
3586 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3587 }
3588
3589 // Extend to the next pow-2. If this store was itself the result of lowering,
3590 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3591 // that's wider than the stored size.
3592 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3593 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3594
3595 if (SrcTy.isPointer()) {
3596 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3597 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3598 }
3599
3600 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3601
3602 // Obtain the smaller value by shifting away the larger value.
3603 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3604 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3605
3606 // Generate the PtrAdd and truncating stores.
3607 LLT PtrTy = MRI.getType(PtrReg);
3608 auto OffsetCst = MIRBuilder.buildConstant(
3609 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3610 auto SmallPtr =
3611 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3612
3613 MachineMemOperand *LargeMMO =
3614 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3615 MachineMemOperand *SmallMMO =
3616 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3617 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3618 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3619 StoreMI.eraseFromParent();
3620 return Legalized;
3621}
3622
3624LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3625 switch (MI.getOpcode()) {
3626 case TargetOpcode::G_LOAD: {
3627 if (TypeIdx != 0)
3628 return UnableToLegalize;
3629 MachineMemOperand &MMO = **MI.memoperands_begin();
3630
3631 // Not sure how to interpret a bitcast of an extending load.
3632 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3633 return UnableToLegalize;
3634
3636 bitcastDst(MI, CastTy, 0);
3637 MMO.setType(CastTy);
3639 return Legalized;
3640 }
3641 case TargetOpcode::G_STORE: {
3642 if (TypeIdx != 0)
3643 return UnableToLegalize;
3644
3645 MachineMemOperand &MMO = **MI.memoperands_begin();
3646
3647 // Not sure how to interpret a bitcast of a truncating store.
3648 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3649 return UnableToLegalize;
3650
3652 bitcastSrc(MI, CastTy, 0);
3653 MMO.setType(CastTy);
3655 return Legalized;
3656 }
3657 case TargetOpcode::G_SELECT: {
3658 if (TypeIdx != 0)
3659 return UnableToLegalize;
3660
3661 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3662 LLVM_DEBUG(
3663 dbgs() << "bitcast action not implemented for vector select\n");
3664 return UnableToLegalize;
3665 }
3666
3668 bitcastSrc(MI, CastTy, 2);
3669 bitcastSrc(MI, CastTy, 3);
3670 bitcastDst(MI, CastTy, 0);
3672 return Legalized;
3673 }
3674 case TargetOpcode::G_AND:
3675 case TargetOpcode::G_OR:
3676 case TargetOpcode::G_XOR: {
3678 bitcastSrc(MI, CastTy, 1);
3679 bitcastSrc(MI, CastTy, 2);
3680 bitcastDst(MI, CastTy, 0);
3682 return Legalized;
3683 }
3684 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3685 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3686 case TargetOpcode::G_INSERT_VECTOR_ELT:
3687 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3688 default:
3689 return UnableToLegalize;
3690 }
3691}
3692
3693// Legalize an instruction by changing the opcode in place.
3694void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3696 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3698}
3699
3701LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3702 using namespace TargetOpcode;
3703
3704 switch(MI.getOpcode()) {
3705 default:
3706 return UnableToLegalize;
3707 case TargetOpcode::G_FCONSTANT:
3708 return lowerFConstant(MI);
3709 case TargetOpcode::G_BITCAST:
3710 return lowerBitcast(MI);
3711 case TargetOpcode::G_SREM:
3712 case TargetOpcode::G_UREM: {
3713 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3714 auto Quot =
3715 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3716 {MI.getOperand(1), MI.getOperand(2)});
3717
3718 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3719 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3720 MI.eraseFromParent();
3721 return Legalized;
3722 }
3723 case TargetOpcode::G_SADDO:
3724 case TargetOpcode::G_SSUBO:
3725 return lowerSADDO_SSUBO(MI);
3726 case TargetOpcode::G_UMULH:
3727 case TargetOpcode::G_SMULH:
3728 return lowerSMULH_UMULH(MI);
3729 case TargetOpcode::G_SMULO:
3730 case TargetOpcode::G_UMULO: {
3731 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3732 // result.
3733 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3734 LLT Ty = MRI.getType(Res);
3735
3736 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3737 ? TargetOpcode::G_SMULH
3738 : TargetOpcode::G_UMULH;
3739
3741 const auto &TII = MIRBuilder.getTII();
3742 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3743 MI.removeOperand(1);
3745
3746 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3747 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3748
3749 // Move insert point forward so we can use the Res register if needed.
3751
3752 // For *signed* multiply, overflow is detected by checking:
3753 // (hi != (lo >> bitwidth-1))
3754 if (Opcode == TargetOpcode::G_SMULH) {
3755 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3756 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3757 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3758 } else {
3759 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3760 }
3761 return Legalized;
3762 }
3763 case TargetOpcode::G_FNEG: {
3764 auto [Res, SubByReg] = MI.getFirst2Regs();
3765 LLT Ty = MRI.getType(Res);
3766
3767 // TODO: Handle vector types once we are able to
3768 // represent them.
3769 if (Ty.isVector())
3770 return UnableToLegalize;
3771 auto SignMask =
3773 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3774 MI.eraseFromParent();
3775 return Legalized;
3776 }
3777 case TargetOpcode::G_FSUB:
3778 case TargetOpcode::G_STRICT_FSUB: {
3779 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3780 LLT Ty = MRI.getType(Res);
3781
3782 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3783 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3784
3785 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3786 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3787 else
3788 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3789
3790 MI.eraseFromParent();
3791 return Legalized;
3792 }
3793 case TargetOpcode::G_FMAD:
3794 return lowerFMad(MI);
3795 case TargetOpcode::G_FFLOOR:
3796 return lowerFFloor(MI);
3797 case TargetOpcode::G_INTRINSIC_ROUND:
3798 return lowerIntrinsicRound(MI);
3799 case TargetOpcode::G_FRINT: {
3800 // Since round even is the assumed rounding mode for unconstrained FP
3801 // operations, rint and roundeven are the same operation.
3802 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3803 return Legalized;
3804 }
3805 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3806 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3807 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3808 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3809 **MI.memoperands_begin());
3810 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3811 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3812 MI.eraseFromParent();
3813 return Legalized;
3814 }
3815 case TargetOpcode::G_LOAD:
3816 case TargetOpcode::G_SEXTLOAD:
3817 case TargetOpcode::G_ZEXTLOAD:
3818 return lowerLoad(cast<GAnyLoad>(MI));
3819 case TargetOpcode::G_STORE:
3820 return lowerStore(cast<GStore>(MI));
3821 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3822 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3823 case TargetOpcode::G_CTLZ:
3824 case TargetOpcode::G_CTTZ:
3825 case TargetOpcode::G_CTPOP:
3826 return lowerBitCount(MI);
3827 case G_UADDO: {
3828 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3829
3830 Register NewRes = MRI.cloneVirtualRegister(Res);
3831
3832 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3833 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3834
3835 MIRBuilder.buildCopy(Res, NewRes);
3836
3837 MI.eraseFromParent();
3838 return Legalized;
3839 }
3840 case G_UADDE: {
3841 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3842 const LLT CondTy = MRI.getType(CarryOut);
3843 const LLT Ty = MRI.getType(Res);
3844
3845 Register NewRes = MRI.cloneVirtualRegister(Res);
3846
3847 // Initial add of the two operands.
3848 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3849
3850 // Initial check for carry.
3851 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3852
3853 // Add the sum and the carry.
3854 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3855 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3856
3857 // Second check for carry. We can only carry if the initial sum is all 1s
3858 // and the carry is set, resulting in a new sum of 0.
3859 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3860 auto ResEqZero =
3861 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3862 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3863 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3864
3865 MIRBuilder.buildCopy(Res, NewRes);
3866
3867 MI.eraseFromParent();
3868 return Legalized;
3869 }
3870 case G_USUBO: {
3871 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3872
3873 MIRBuilder.buildSub(Res, LHS, RHS);
3875
3876 MI.eraseFromParent();
3877 return Legalized;
3878 }
3879 case G_USUBE: {
3880 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3881 const LLT CondTy = MRI.getType(BorrowOut);
3882 const LLT Ty = MRI.getType(Res);
3883
3884 // Initial subtract of the two operands.
3885 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3886
3887 // Initial check for borrow.
3888 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3889
3890 // Subtract the borrow from the first subtract.
3891 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3892 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3893
3894 // Second check for borrow. We can only borrow if the initial difference is
3895 // 0 and the borrow is set, resulting in a new difference of all 1s.
3896 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3897 auto TmpResEqZero =
3898 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3899 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3900 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3901
3902 MI.eraseFromParent();
3903 return Legalized;
3904 }
3905 case G_UITOFP:
3906 return lowerUITOFP(MI);
3907 case G_SITOFP:
3908 return lowerSITOFP(MI);
3909 case G_FPTOUI:
3910 return lowerFPTOUI(MI);
3911 case G_FPTOSI:
3912 return lowerFPTOSI(MI);
3913 case G_FPTRUNC:
3914 return lowerFPTRUNC(MI);
3915 case G_FPOWI:
3916 return lowerFPOWI(MI);
3917 case G_SMIN:
3918 case G_SMAX:
3919 case G_UMIN:
3920 case G_UMAX:
3921 return lowerMinMax(MI);
3922 case G_FCOPYSIGN:
3923 return lowerFCopySign(MI);
3924 case G_FMINNUM:
3925 case G_FMAXNUM:
3926 return lowerFMinNumMaxNum(MI);
3927 case G_MERGE_VALUES:
3928 return lowerMergeValues(MI);
3929 case G_UNMERGE_VALUES:
3930 return lowerUnmergeValues(MI);
3931 case TargetOpcode::G_SEXT_INREG: {
3932 assert(MI.getOperand(2).isImm() && "Expected immediate");
3933 int64_t SizeInBits = MI.getOperand(2).getImm();
3934
3935 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3936 LLT DstTy = MRI.getType(DstReg);
3937 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3938
3939 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3940 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3941 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3942 MI.eraseFromParent();
3943 return Legalized;
3944 }
3945 case G_EXTRACT_VECTOR_ELT:
3946 case G_INSERT_VECTOR_ELT:
3948 case G_SHUFFLE_VECTOR:
3949 return lowerShuffleVector(MI);
3950 case G_DYN_STACKALLOC:
3951 return lowerDynStackAlloc(MI);
3952 case G_STACKSAVE:
3953 return lowerStackSave(MI);
3954 case G_STACKRESTORE:
3955 return lowerStackRestore(MI);
3956 case G_EXTRACT:
3957 return lowerExtract(MI);
3958 case G_INSERT:
3959 return lowerInsert(MI);
3960 case G_BSWAP:
3961 return lowerBswap(MI);
3962 case G_BITREVERSE:
3963 return lowerBitreverse(MI);
3964 case G_READ_REGISTER:
3965 case G_WRITE_REGISTER:
3966 return lowerReadWriteRegister(MI);
3967 case G_UADDSAT:
3968 case G_USUBSAT: {
3969 // Try to make a reasonable guess about which lowering strategy to use. The
3970 // target can override this with custom lowering and calling the
3971 // implementation functions.
3972 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3973 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3974 return lowerAddSubSatToMinMax(MI);
3976 }
3977 case G_SADDSAT:
3978 case G_SSUBSAT: {
3979 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3980
3981 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3982 // since it's a shorter expansion. However, we would need to figure out the
3983 // preferred boolean type for the carry out for the query.
3984 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3985 return lowerAddSubSatToMinMax(MI);
3987 }
3988 case G_SSHLSAT:
3989 case G_USHLSAT:
3990 return lowerShlSat(MI);
3991 case G_ABS:
3992 return lowerAbsToAddXor(MI);
3993 case G_SELECT:
3994 return lowerSelect(MI);
3995 case G_IS_FPCLASS:
3996 return lowerISFPCLASS(MI);
3997 case G_SDIVREM:
3998 case G_UDIVREM:
3999 return lowerDIVREM(MI);
4000 case G_FSHL:
4001 case G_FSHR:
4002 return lowerFunnelShift(MI);
4003 case G_ROTL:
4004 case G_ROTR:
4005 return lowerRotate(MI);
4006 case G_MEMSET:
4007 case G_MEMCPY:
4008 case G_MEMMOVE:
4009 return lowerMemCpyFamily(MI);
4010 case G_MEMCPY_INLINE:
4011 return lowerMemcpyInline(MI);
4012 case G_ZEXT:
4013 case G_SEXT:
4014 case G_ANYEXT:
4015 return lowerEXT(MI);
4016 case G_TRUNC:
4017 return lowerTRUNC(MI);
4019 return lowerVectorReduction(MI);
4020 case G_VAARG:
4021 return lowerVAArg(MI);
4022 }
4023}
4024
4026 Align MinAlign) const {
4027 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4028 // datalayout for the preferred alignment. Also there should be a target hook
4029 // for this to allow targets to reduce the alignment and ignore the
4030 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4031 // the type.
4032 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4033}
4034
4037 MachinePointerInfo &PtrInfo) {
4040 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4041
4042 unsigned AddrSpace = DL.getAllocaAddrSpace();
4043 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4044
4045 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4046 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4047}
4048
4050 LLT VecTy) {
4051 LLT IdxTy = B.getMRI()->getType(IdxReg);
4052 unsigned NElts = VecTy.getNumElements();
4053
4054 int64_t IdxVal;
4055 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4056 if (IdxVal < VecTy.getNumElements())
4057 return IdxReg;
4058 // If a constant index would be out of bounds, clamp it as well.
4059 }
4060
4061 if (isPowerOf2_32(NElts)) {
4062 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4063 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4064 }
4065
4066 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4067 .getReg(0);
4068}
4069
4071 Register Index) {
4072 LLT EltTy = VecTy.getElementType();
4073
4074 // Calculate the element offset and add it to the pointer.
4075 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4076 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4077 "Converting bits to bytes lost precision");
4078
4080
4081 // Convert index to the correct size for the address space.
4083 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4084 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4085 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4086 if (IdxTy != MRI.getType(Index))
4088
4089 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4090 MIRBuilder.buildConstant(IdxTy, EltSize));
4091
4092 LLT PtrTy = MRI.getType(VecPtr);
4093 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4094}
4095
4096#ifndef NDEBUG
4097/// Check that all vector operands have same number of elements. Other operands
4098/// should be listed in NonVecOp.
4101 std::initializer_list<unsigned> NonVecOpIndices) {
4102 if (MI.getNumMemOperands() != 0)
4103 return false;
4104
4105 LLT VecTy = MRI.getType(MI.getReg(0));
4106 if (!VecTy.isVector())
4107 return false;
4108 unsigned NumElts = VecTy.getNumElements();
4109
4110 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4111 MachineOperand &Op = MI.getOperand(OpIdx);
4112 if (!Op.isReg()) {
4113 if (!is_contained(NonVecOpIndices, OpIdx))
4114 return false;
4115 continue;
4116 }
4117
4118 LLT Ty = MRI.getType(Op.getReg());
4119 if (!Ty.isVector()) {
4120 if (!is_contained(NonVecOpIndices, OpIdx))
4121 return false;
4122 continue;
4123 }
4124
4125 if (Ty.getNumElements() != NumElts)
4126 return false;
4127 }
4128
4129 return true;
4130}
4131#endif
4132
4133/// Fill \p DstOps with DstOps that have same number of elements combined as
4134/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4135/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4136/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4137static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4138 unsigned NumElts) {
4139 LLT LeftoverTy;
4140 assert(Ty.isVector() && "Expected vector type");
4141 LLT EltTy = Ty.getElementType();
4142 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4143 int NumParts, NumLeftover;
4144 std::tie(NumParts, NumLeftover) =
4145 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4146
4147 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4148 for (int i = 0; i < NumParts; ++i) {
4149 DstOps.push_back(NarrowTy);
4150 }
4151
4152 if (LeftoverTy.isValid()) {
4153 assert(NumLeftover == 1 && "expected exactly one leftover");
4154 DstOps.push_back(LeftoverTy);
4155 }
4156}
4157
4158/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4159/// made from \p Op depending on operand type.
4160static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4161 MachineOperand &Op) {
4162 for (unsigned i = 0; i < N; ++i) {
4163 if (Op.isReg())
4164 Ops.push_back(Op.getReg());
4165 else if (Op.isImm())
4166 Ops.push_back(Op.getImm());
4167 else if (Op.isPredicate())
4168 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4169 else
4170 llvm_unreachable("Unsupported type");
4171 }
4172}
4173
4174// Handle splitting vector operations which need to have the same number of
4175// elements in each type index, but each type index may have a different element
4176// type.
4177//
4178// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4179// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4180// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4181//
4182// Also handles some irregular breakdown cases, e.g.
4183// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4184// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4185// s64 = G_SHL s64, s32
4188 GenericMachineInstr &MI, unsigned NumElts,
4189 std::initializer_list<unsigned> NonVecOpIndices) {
4190 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4191 "Non-compatible opcode or not specified non-vector operands");
4192 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4193
4194 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4195 unsigned NumDefs = MI.getNumDefs();
4196
4197 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4198 // Build instructions with DstOps to use instruction found by CSE directly.
4199 // CSE copies found instruction into given vreg when building with vreg dest.
4200 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4201 // Output registers will be taken from created instructions.
4202 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4203 for (unsigned i = 0; i < NumDefs; ++i) {
4204 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4205 }
4206
4207 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4208 // Operands listed in NonVecOpIndices will be used as is without splitting;
4209 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4210 // scalar condition (op 1), immediate in sext_inreg (op 2).
4211 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4212 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4213 ++UseIdx, ++UseNo) {
4214 if (is_contained(NonVecOpIndices, UseIdx)) {
4215 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4216 MI.getOperand(UseIdx));
4217 } else {
4218 SmallVector<Register, 8> SplitPieces;
4219 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4220 MRI);
4221 for (auto Reg : SplitPieces)
4222 InputOpsPieces[UseNo].push_back(Reg);
4223 }
4224 }
4225
4226 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4227
4228 // Take i-th piece of each input operand split and build sub-vector/scalar
4229 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4230 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4232 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4233 Defs.push_back(OutputOpsPieces[DstNo][i]);
4234
4236 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4237 Uses.push_back(InputOpsPieces[InputNo][i]);
4238
4239 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4240 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4241 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4242 }
4243
4244 // Merge small outputs into MI's output for each def operand.
4245 if (NumLeftovers) {
4246 for (unsigned i = 0; i < NumDefs; ++i)
4247 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4248 } else {
4249 for (unsigned i = 0; i < NumDefs; ++i)
4250 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4251 }
4252
4253 MI.eraseFromParent();
4254 return Legalized;
4255}
4256
4259 unsigned NumElts) {
4260 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4261
4262 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4263 unsigned NumDefs = MI.getNumDefs();
4264
4265 SmallVector<DstOp, 8> OutputOpsPieces;
4266 SmallVector<Register, 8> OutputRegs;
4267 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4268
4269 // Instructions that perform register split will be inserted in basic block
4270 // where register is defined (basic block is in the next operand).
4271 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4272 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4273 UseIdx += 2, ++UseNo) {
4274 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4276 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4277 MIRBuilder, MRI);
4278 }
4279
4280 // Build PHIs with fewer elements.
4281 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4282 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4283 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4284 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4285 Phi.addDef(
4286 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4287 OutputRegs.push_back(Phi.getReg(0));
4288
4289 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4290 Phi.addUse(InputOpsPieces[j][i]);
4291 Phi.add(MI.getOperand(1 + j * 2 + 1));
4292 }
4293 }
4294
4295 // Set the insert point after the existing PHIs
4296 MachineBasicBlock &MBB = *MI.getParent();
4298
4299 // Merge small outputs into MI's def.
4300 if (NumLeftovers) {
4301 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4302 } else {
4303 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4304 }
4305
4306 MI.eraseFromParent();
4307 return Legalized;
4308}
4309
4312 unsigned TypeIdx,
4313 LLT NarrowTy) {
4314 const int NumDst = MI.getNumOperands() - 1;
4315 const Register SrcReg = MI.getOperand(NumDst).getReg();
4316 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4317 LLT SrcTy = MRI.getType(SrcReg);
4318
4319 if (TypeIdx != 1 || NarrowTy == DstTy)
4320 return UnableToLegalize;
4321
4322 // Requires compatible types. Otherwise SrcReg should have been defined by
4323 // merge-like instruction that would get artifact combined. Most likely
4324 // instruction that defines SrcReg has to perform more/fewer elements
4325 // legalization compatible with NarrowTy.
4326 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4327 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4328
4329 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4330 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4331 return UnableToLegalize;
4332
4333 // This is most likely DstTy (smaller then register size) packed in SrcTy
4334 // (larger then register size) and since unmerge was not combined it will be
4335 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4336 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4337
4338 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4339 //
4340 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4341 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4342 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4343 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4344 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4345 const int PartsPerUnmerge = NumDst / NumUnmerge;
4346
4347 for (int I = 0; I != NumUnmerge; ++I) {
4348 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4349
4350 for (int J = 0; J != PartsPerUnmerge; ++J)
4351 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4352 MIB.addUse(Unmerge.getReg(I));
4353 }
4354
4355 MI.eraseFromParent();
4356 return Legalized;
4357}
4358
4361 LLT NarrowTy) {
4362 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4363 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4364 // that should have been artifact combined. Most likely instruction that uses
4365 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4366 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4367 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4368 if (NarrowTy == SrcTy)
4369 return UnableToLegalize;
4370
4371 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4372 // is for old mir tests. Since the changes to more/fewer elements it should no
4373 // longer be possible to generate MIR like this when starting from llvm-ir
4374 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4375 if (TypeIdx == 1) {
4376 assert(SrcTy.isVector() && "Expected vector types");
4377 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4378 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4379 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4380 return UnableToLegalize;
4381 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4382 //
4383 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4384 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4385 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4386 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4387 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4388 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4389
4391 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4392 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4393 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4394 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4395 Elts.push_back(Unmerge.getReg(j));
4396 }
4397
4398 SmallVector<Register, 8> NarrowTyElts;
4399 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4400 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4401 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4402 ++i, Offset += NumNarrowTyElts) {
4403 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4404 NarrowTyElts.push_back(
4405 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4406 }
4407
4408 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4409 MI.eraseFromParent();
4410 return Legalized;
4411 }
4412
4413 assert(TypeIdx == 0 && "Bad type index");
4414 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4415 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4416 return UnableToLegalize;
4417
4418 // This is most likely SrcTy (smaller then register size) packed in DstTy
4419 // (larger then register size) and since merge was not combined it will be
4420 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4421 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4422
4423 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4424 //
4425 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4426 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4427 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4428 SmallVector<Register, 8> NarrowTyElts;
4429 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4430 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4431 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4432 for (unsigned i = 0; i < NumParts; ++i) {
4434 for (unsigned j = 0; j < NumElts; ++j)
4435 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4436 NarrowTyElts.push_back(
4437 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4438 }
4439
4440 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4441 MI.eraseFromParent();
4442 return Legalized;
4443}
4444
4447 unsigned TypeIdx,
4448 LLT NarrowVecTy) {
4449 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4450 Register InsertVal;
4451 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4452
4453 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4454 if (IsInsert)
4455 InsertVal = MI.getOperand(2).getReg();
4456
4457 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4458
4459 // TODO: Handle total scalarization case.
4460 if (!NarrowVecTy.isVector())
4461 return UnableToLegalize;
4462
4463 LLT VecTy = MRI.getType(SrcVec);
4464
4465 // If the index is a constant, we can really break this down as you would
4466 // expect, and index into the target size pieces.
4467 int64_t IdxVal;
4468 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4469 if (MaybeCst) {
4470 IdxVal = MaybeCst->Value.getSExtValue();
4471 // Avoid out of bounds indexing the pieces.
4472 if (IdxVal >= VecTy.getNumElements()) {
4473 MIRBuilder.buildUndef(DstReg);
4474 MI.eraseFromParent();
4475 return Legalized;
4476 }
4477
4478 SmallVector<Register, 8> VecParts;
4479 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4480
4481 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4482 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4483 TargetOpcode::G_ANYEXT);
4484
4485 unsigned NewNumElts = NarrowVecTy.getNumElements();
4486
4487 LLT IdxTy = MRI.getType(Idx);
4488 int64_t PartIdx = IdxVal / NewNumElts;
4489 auto NewIdx =
4490 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4491
4492 if (IsInsert) {
4493 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4494
4495 // Use the adjusted index to insert into one of the subvectors.
4496 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4497 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4498 VecParts[PartIdx] = InsertPart.getReg(0);
4499
4500 // Recombine the inserted subvector with the others to reform the result
4501 // vector.
4502 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4503 } else {
4504 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4505 }
4506
4507 MI.eraseFromParent();
4508 return Legalized;
4509 }
4510
4511 // With a variable index, we can't perform the operation in a smaller type, so
4512 // we're forced to expand this.
4513 //
4514 // TODO: We could emit a chain of compare/select to figure out which piece to
4515 // index.
4517}
4518
4521 LLT NarrowTy) {
4522 // FIXME: Don't know how to handle secondary types yet.
4523 if (TypeIdx != 0)
4524 return UnableToLegalize;
4525
4526 // This implementation doesn't work for atomics. Give up instead of doing
4527 // something invalid.
4528 if (LdStMI.isAtomic())
4529 return UnableToLegalize;
4530
4531 bool IsLoad = isa<GLoad>(LdStMI);
4532 Register ValReg = LdStMI.getReg(0);
4533 Register AddrReg = LdStMI.getPointerReg();
4534 LLT ValTy = MRI.getType(ValReg);
4535
4536 // FIXME: Do we need a distinct NarrowMemory legalize action?
4537 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4538 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4539 return UnableToLegalize;
4540 }
4541
4542 int NumParts = -1;
4543 int NumLeftover = -1;
4544 LLT LeftoverTy;
4545 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4546 if (IsLoad) {
4547 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4548 } else {
4549 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4550 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4551 NumParts = NarrowRegs.size();
4552 NumLeftover = NarrowLeftoverRegs.size();
4553 }
4554 }
4555
4556 if (NumParts == -1)
4557 return UnableToLegalize;
4558
4559 LLT PtrTy = MRI.getType(AddrReg);
4560 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4561
4562 unsigned TotalSize = ValTy.getSizeInBits();
4563
4564 // Split the load/store into PartTy sized pieces starting at Offset. If this
4565 // is a load, return the new registers in ValRegs. For a store, each elements
4566 // of ValRegs should be PartTy. Returns the next offset that needs to be
4567 // handled.
4569 auto MMO = LdStMI.getMMO();
4570 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4571 unsigned NumParts, unsigned Offset) -> unsigned {
4573 unsigned PartSize = PartTy.getSizeInBits();
4574 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4575 ++Idx) {
4576 unsigned ByteOffset = Offset / 8;
4577 Register NewAddrReg;
4578
4579 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4580
4581 MachineMemOperand *NewMMO =
4582 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4583
4584 if (IsLoad) {
4585 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4586 ValRegs.push_back(Dst);
4587 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4588 } else {
4589 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4590 }
4591 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4592 }
4593
4594 return Offset;
4595 };
4596
4597 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4598 unsigned HandledOffset =
4599 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4600
4601 // Handle the rest of the register if this isn't an even type breakdown.
4602 if (LeftoverTy.isValid())
4603 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4604
4605 if (IsLoad) {
4606 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4607 LeftoverTy, NarrowLeftoverRegs);
4608 }
4609
4610 LdStMI.eraseFromParent();
4611 return Legalized;
4612}
4613
4616 LLT NarrowTy) {
4617 using namespace TargetOpcode;
4618 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4619 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4620
4621 switch (MI.getOpcode()) {
4622 case G_IMPLICIT_DEF:
4623 case G_TRUNC:
4624 case G_AND:
4625 case G_OR:
4626 case G_XOR:
4627 case G_ADD:
4628 case G_SUB:
4629 case G_MUL:
4630 case G_PTR_ADD:
4631 case G_SMULH:
4632 case G_UMULH:
4633 case G_FADD:
4634 case G_FMUL:
4635 case G_FSUB:
4636 case G_FNEG:
4637 case G_FABS:
4638 case G_FCANONICALIZE:
4639 case G_FDIV:
4640 case G_FREM:
4641 case G_FMA:
4642 case G_FMAD:
4643 case G_FPOW:
4644 case G_FEXP:
4645 case G_FEXP2:
4646 case G_FEXP10:
4647 case G_FLOG:
4648 case G_FLOG2:
4649 case G_FLOG10:
4650 case G_FLDEXP:
4651 case G_FNEARBYINT:
4652 case G_FCEIL:
4653 case G_FFLOOR:
4654 case G_FRINT:
4655 case G_INTRINSIC_ROUND:
4656 case G_INTRINSIC_ROUNDEVEN:
4657 case G_INTRINSIC_TRUNC:
4658 case G_FCOS:
4659 case G_FSIN:
4660 case G_FSQRT:
4661 case G_BSWAP:
4662 case G_BITREVERSE:
4663 case G_SDIV:
4664 case G_UDIV:
4665 case G_SREM:
4666 case G_UREM:
4667 case G_SDIVREM:
4668 case G_UDIVREM:
4669 case G_SMIN:
4670 case G_SMAX:
4671 case G_UMIN:
4672 case G_UMAX:
4673 case G_ABS:
4674 case G_FMINNUM:
4675 case G_FMAXNUM:
4676 case G_FMINNUM_IEEE:
4677 case G_FMAXNUM_IEEE:
4678 case G_FMINIMUM:
4679 case G_FMAXIMUM:
4680 case G_FSHL:
4681 case G_FSHR:
4682 case G_ROTL:
4683 case G_ROTR:
4684 case G_FREEZE:
4685 case G_SADDSAT:
4686 case G_SSUBSAT:
4687 case G_UADDSAT:
4688 case G_USUBSAT:
4689 case G_UMULO:
4690 case G_SMULO:
4691 case G_SHL:
4692 case G_LSHR:
4693 case G_ASHR:
4694 case G_SSHLSAT:
4695 case G_USHLSAT:
4696 case G_CTLZ:
4697 case G_CTLZ_ZERO_UNDEF:
4698 case G_CTTZ:
4699 case G_CTTZ_ZERO_UNDEF:
4700 case G_CTPOP:
4701 case G_FCOPYSIGN:
4702 case G_ZEXT:
4703 case G_SEXT:
4704 case G_ANYEXT:
4705 case G_FPEXT:
4706 case G_FPTRUNC:
4707 case G_SITOFP:
4708 case G_UITOFP:
4709 case G_FPTOSI:
4710 case G_FPTOUI:
4711 case G_INTTOPTR:
4712 case G_PTRTOINT:
4713 case G_ADDRSPACE_CAST:
4714 case G_UADDO:
4715 case G_USUBO:
4716 case G_UADDE:
4717 case G_USUBE:
4718 case G_SADDO:
4719 case G_SSUBO:
4720 case G_SADDE:
4721 case G_SSUBE:
4722 case G_STRICT_FADD:
4723 case G_STRICT_FSUB:
4724 case G_STRICT_FMUL:
4725 case G_STRICT_FMA:
4726 case G_STRICT_FLDEXP:
4727 case G_FFREXP:
4728 return fewerElementsVectorMultiEltType(GMI, NumElts);
4729 case G_ICMP:
4730 case G_FCMP:
4731 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4732 case G_IS_FPCLASS:
4733 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4734 case G_SELECT:
4735 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4736 return fewerElementsVectorMultiEltType(GMI, NumElts);
4737 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4738 case G_PHI:
4739 return fewerElementsVectorPhi(GMI, NumElts);
4740 case G_UNMERGE_VALUES:
4741 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4742 case G_BUILD_VECTOR:
4743 assert(TypeIdx == 0 && "not a vector type index");
4744 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4745 case G_CONCAT_VECTORS:
4746 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4747 return UnableToLegalize;
4748 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4749 case G_EXTRACT_VECTOR_ELT:
4750 case G_INSERT_VECTOR_ELT:
4751 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4752 case G_LOAD:
4753 case G_STORE:
4754 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4755 case G_SEXT_INREG:
4756 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4758 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4759 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4760 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4761 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4762 case G_SHUFFLE_VECTOR:
4763 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4764 case G_FPOWI:
4765 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4766 case G_BITCAST:
4767 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4768 case G_INTRINSIC_FPTRUNC_ROUND:
4769 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
4770 default:
4771 return UnableToLegalize;
4772 }
4773}
4774
4777 LLT NarrowTy) {
4778 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4779 "Not a bitcast operation");
4780
4781 if (TypeIdx != 0)
4782 return UnableToLegalize;
4783
4784 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4785
4786 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4787 LLT SrcNarrowTy =
4788 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4789
4790 // Split the Src and Dst Reg into smaller registers
4791 SmallVector<Register> SrcVRegs, BitcastVRegs;
4792 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4793 return UnableToLegalize;
4794
4795 // Build new smaller bitcast instructions
4796 // Not supporting Leftover types for now but will have to
4797 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4798 BitcastVRegs.push_back(
4799 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4800
4801 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4802 MI.eraseFromParent();
4803 return Legalized;
4804}
4805
4807 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4808 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4809 if (TypeIdx != 0)
4810 return UnableToLegalize;
4811
4812 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4813 MI.getFirst3RegLLTs();
4814 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4815 // The shuffle should be canonicalized by now.
4816 if (DstTy != Src1Ty)
4817 return UnableToLegalize;
4818 if (DstTy != Src2Ty)
4819 return UnableToLegalize;
4820
4821 if (!isPowerOf2_32(DstTy.getNumElements()))
4822 return UnableToLegalize;
4823
4824 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4825 // Further legalization attempts will be needed to do split further.
4826 NarrowTy =
4827 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4828 unsigned NewElts = NarrowTy.getNumElements();
4829
4830 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4831 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4832 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4833 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4834 SplitSrc2Regs[1]};
4835
4836 Register Hi, Lo;
4837
4838 // If Lo or Hi uses elements from at most two of the four input vectors, then
4839 // express it as a vector shuffle of those two inputs. Otherwise extract the
4840 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4842 for (unsigned High = 0; High < 2; ++High) {
4843 Register &Output = High ? Hi : Lo;
4844
4845 // Build a shuffle mask for the output, discovering on the fly which
4846 // input vectors to use as shuffle operands (recorded in InputUsed).
4847 // If building a suitable shuffle vector proves too hard, then bail
4848 // out with useBuildVector set.
4849 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4850 unsigned FirstMaskIdx = High * NewElts;
4851 bool UseBuildVector = false;
4852 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4853 // The mask element. This indexes into the input.
4854 int Idx = Mask[FirstMaskIdx + MaskOffset];
4855
4856 // The input vector this mask element indexes into.
4857 unsigned Input = (unsigned)Idx / NewElts;
4858
4859 if (Input >= std::size(Inputs)) {
4860 // The mask element does not index into any input vector.
4861 Ops.push_back(-1);
4862 continue;
4863 }
4864
4865 // Turn the index into an offset from the start of the input vector.
4866 Idx -= Input * NewElts;
4867
4868 // Find or create a shuffle vector operand to hold this input.
4869 unsigned OpNo;
4870 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4871 if (InputUsed[OpNo] == Input) {
4872 // This input vector is already an operand.
4873 break;
4874 } else if (InputUsed[OpNo] == -1U) {
4875 // Create a new operand for this input vector.
4876 InputUsed[OpNo] = Input;
4877 break;
4878 }
4879 }
4880
4881 if (OpNo >= std::size(InputUsed)) {
4882 // More than two input vectors used! Give up on trying to create a
4883 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4884 UseBuildVector = true;
4885 break;
4886 }
4887
4888 // Add the mask index for the new shuffle vector.
4889 Ops.push_back(Idx + OpNo * NewElts);
4890 }
4891
4892 if (UseBuildVector) {
4893 LLT EltTy = NarrowTy.getElementType();
4895
4896 // Extract the input elements by hand.
4897 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4898 // The mask element. This indexes into the input.
4899 int Idx = Mask[FirstMaskIdx + MaskOffset];
4900
4901 // The input vector this mask element indexes into.
4902 unsigned Input = (unsigned)Idx / NewElts;
4903
4904 if (Input >= std::size(Inputs)) {
4905 // The mask element is "undef" or indexes off the end of the input.
4906 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4907 continue;
4908 }
4909
4910 // Turn the index into an offset from the start of the input vector.
4911 Idx -= Input * NewElts;
4912
4913 // Extract the vector element by hand.
4914 SVOps.push_back(MIRBuilder
4915 .buildExtractVectorElement(
4916 EltTy, Inputs[Input],
4918 .getReg(0));
4919 }
4920
4921 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4922 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4923 } else if (InputUsed[0] == -1U) {
4924 // No input vectors were used! The result is undefined.
4925 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4926 } else {
4927 Register Op0 = Inputs[InputUsed[0]];
4928 // If only one input was used, use an undefined vector for the other.
4929 Register Op1 = InputUsed[1] == -1U
4930 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4931 : Inputs[InputUsed[1]];
4932 // At least one input vector was used. Create a new shuffle vector.
4933 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4934 }
4935
4936 Ops.clear();
4937 }
4938
4939 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4940 MI.eraseFromParent();
4941 return Legalized;
4942}
4943
4945 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4946 auto &RdxMI = cast<GVecReduce>(MI);
4947
4948 if (TypeIdx != 1)
4949 return UnableToLegalize;
4950
4951 // The semantics of the normal non-sequential reductions allow us to freely
4952 // re-associate the operation.
4953 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4954
4955 if (NarrowTy.isVector() &&
4956 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4957 return UnableToLegalize;
4958
4959 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4960 SmallVector<Register> SplitSrcs;
4961 // If NarrowTy is a scalar then we're being asked to scalarize.
4962 const unsigned NumParts =
4963 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4964 : SrcTy.getNumElements();
4965
4966 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4967 if (NarrowTy.isScalar()) {
4968 if (DstTy != NarrowTy)
4969 return UnableToLegalize; // FIXME: handle implicit extensions.
4970
4971 if (isPowerOf2_32(NumParts)) {
4972 // Generate a tree of scalar operations to reduce the critical path.
4973 SmallVector<Register> PartialResults;
4974 unsigned NumPartsLeft = NumParts;
4975 while (NumPartsLeft > 1) {
4976 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4977 PartialResults.emplace_back(
4979 .buildInstr(ScalarOpc, {NarrowTy},
4980 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4981 .getReg(0));
4982 }
4983 SplitSrcs = PartialResults;
4984 PartialResults.clear();
4985 NumPartsLeft = SplitSrcs.size();
4986 }
4987 assert(SplitSrcs.size() == 1);
4988 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4989 MI.eraseFromParent();
4990 return Legalized;
4991 }
4992 // If we can't generate a tree, then just do sequential operations.
4993 Register Acc = SplitSrcs[0];
4994 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4995 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4996 .getReg(0);
4997 MIRBuilder.buildCopy(DstReg, Acc);
4998 MI.eraseFromParent();
4999 return Legalized;
5000 }
5001 SmallVector<Register> PartialReductions;
5002 for (unsigned Part = 0; Part < NumParts; ++Part) {
5003 PartialReductions.push_back(
5004 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5005 .getReg(0));
5006 }
5007
5008 // If the types involved are powers of 2, we can generate intermediate vector
5009 // ops, before generating a final reduction operation.
5010 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5011 isPowerOf2_32(NarrowTy.getNumElements())) {
5012 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5013 }
5014
5015 Register Acc = PartialReductions[0];
5016 for (unsigned Part = 1; Part < NumParts; ++Part) {
5017 if (Part == NumParts - 1) {
5018 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5019 {Acc, PartialReductions[Part]});
5020 } else {
5021 Acc = MIRBuilder
5022 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5023 .getReg(0);
5024 }
5025 }
5026 MI.eraseFromParent();
5027 return Legalized;
5028}
5029
5032 unsigned int TypeIdx,
5033 LLT NarrowTy) {
5034 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5035 MI.getFirst3RegLLTs();
5036 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5037 DstTy != NarrowTy)
5038 return UnableToLegalize;
5039
5040 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5041 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5042 "Unexpected vecreduce opcode");
5043 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5044 ? TargetOpcode::G_FADD
5045 : TargetOpcode::G_FMUL;
5046
5047 SmallVector<Register> SplitSrcs;
5048 unsigned NumParts = SrcTy.getNumElements();
5049 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5050 Register Acc = ScalarReg;
5051 for (unsigned i = 0; i < NumParts; i++)
5052 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5053 .getReg(0);
5054
5055 MIRBuilder.buildCopy(DstReg, Acc);
5056 MI.eraseFromParent();
5057 return Legalized;
5058}
5059
5061LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5062 LLT SrcTy, LLT NarrowTy,
5063 unsigned ScalarOpc) {
5064 SmallVector<Register> SplitSrcs;
5065 // Split the sources into NarrowTy size pieces.
5066 extractParts(SrcReg, NarrowTy,
5067 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5068 MIRBuilder, MRI);
5069 // We're going to do a tree reduction using vector operations until we have
5070 // one NarrowTy size value left.
5071 while (SplitSrcs.size() > 1) {
5072 SmallVector<Register> PartialRdxs;
5073 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5074 Register LHS = SplitSrcs[Idx];
5075 Register RHS = SplitSrcs[Idx + 1];
5076 // Create the intermediate vector op.
5077 Register Res =
5078 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5079 PartialRdxs.push_back(Res);
5080 }
5081 SplitSrcs = std::move(PartialRdxs);
5082 }
5083 // Finally generate the requested NarrowTy based reduction.
5085 MI.getOperand(1).setReg(SplitSrcs[0]);
5087 return Legalized;
5088}
5089
5092 const LLT HalfTy, const LLT AmtTy) {
5093
5094 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5095 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5096 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5097
5098 if (Amt.isZero()) {
5099 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5100 MI.eraseFromParent();
5101 return Legalized;
5102 }
5103
5104 LLT NVT = HalfTy;
5105 unsigned NVTBits = HalfTy.getSizeInBits();
5106 unsigned VTBits = 2 * NVTBits;
5107
5108 SrcOp Lo(Register(0)), Hi(Register(0));
5109 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5110 if (Amt.ugt(VTBits)) {
5111 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5112 } else if (Amt.ugt(NVTBits)) {
5113 Lo = MIRBuilder.buildConstant(NVT, 0);
5114 Hi = MIRBuilder.buildShl(NVT, InL,
5115 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5116 } else if (Amt == NVTBits) {
5117 Lo = MIRBuilder.buildConstant(NVT, 0);
5118 Hi = InL;
5119 } else {
5120 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5121 auto OrLHS =
5122 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5123 auto OrRHS = MIRBuilder.buildLShr(
5124 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5125 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5126 }
5127 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5128 if (Amt.ugt(VTBits)) {
5129 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5130 } else if (Amt.ugt(NVTBits)) {
5131 Lo = MIRBuilder.buildLShr(NVT, InH,
5132 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5133 Hi = MIRBuilder.buildConstant(NVT, 0);
5134 } else if (Amt == NVTBits) {
5135 Lo = InH;
5136 Hi = MIRBuilder.buildConstant(NVT, 0);
5137 } else {
5138 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5139
5140 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5141 auto OrRHS = MIRBuilder.buildShl(
5142 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5143
5144 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5145 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5146 }
5147 } else {
5148 if (Amt.ugt(VTBits)) {
5150 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5151 } else if (Amt.ugt(NVTBits)) {
5152 Lo = MIRBuilder.buildAShr(NVT, InH,
5153 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5154 Hi = MIRBuilder.buildAShr(NVT, InH,
5155 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5156 } else if (Amt == NVTBits) {
5157 Lo = InH;
5158 Hi = MIRBuilder.buildAShr(NVT, InH,
5159 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5160 } else {
5161 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5162
5163 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5164 auto OrRHS = MIRBuilder.buildShl(
5165 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5166
5167 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5168 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5169 }
5170 }
5171
5172 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5173 MI.eraseFromParent();
5174
5175 return Legalized;
5176}
5177
5178// TODO: Optimize if constant shift amount.
5181 LLT RequestedTy) {
5182 if (TypeIdx == 1) {
5184 narrowScalarSrc(MI, RequestedTy, 2);
5186 return Legalized;
5187 }
5188
5189 Register DstReg = MI.getOperand(0).getReg();
5190 LLT DstTy = MRI.getType(DstReg);
5191 if (DstTy.isVector())
5192 return UnableToLegalize;
5193
5194 Register Amt = MI.getOperand(2).getReg();
5195 LLT ShiftAmtTy = MRI.getType(Amt);
5196 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5197 if (DstEltSize % 2 != 0)
5198 return UnableToLegalize;
5199
5200 // Ignore the input type. We can only go to exactly half the size of the
5201 // input. If that isn't small enough, the resulting pieces will be further
5202 // legalized.
5203 const unsigned NewBitSize = DstEltSize / 2;
5204 const LLT HalfTy = LLT::scalar(NewBitSize);
5205 const LLT CondTy = LLT::scalar(1);
5206
5207 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5208 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5209 ShiftAmtTy);
5210 }
5211
5212 // TODO: Expand with known bits.
5213
5214 // Handle the fully general expansion by an unknown amount.
5215 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5216
5217 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5218 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5219 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5220
5221 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5222 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5223
5224 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5225 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5226 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5227
5228 Register ResultRegs[2];
5229 switch (MI.getOpcode()) {
5230 case TargetOpcode::G_SHL: {
5231 // Short: ShAmt < NewBitSize
5232 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5233
5234 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5235 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5236 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5237
5238 // Long: ShAmt >= NewBitSize
5239 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5240 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5241
5242 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5243 auto Hi = MIRBuilder.buildSelect(
5244 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5245
5246 ResultRegs[0] = Lo.getReg(0);
5247 ResultRegs[1] = Hi.getReg(0);
5248 break;
5249 }
5250 case TargetOpcode::G_LSHR:
5251 case TargetOpcode::G_ASHR: {
5252 // Short: ShAmt < NewBitSize
5253 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5254
5255 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5256 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5257 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5258
5259 // Long: ShAmt >= NewBitSize
5261 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5262 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5263 } else {
5264 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5265 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5266 }
5267 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5268 {InH, AmtExcess}); // Lo from Hi part.
5269
5270 auto Lo = MIRBuilder.buildSelect(
5271 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5272
5273 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5274
5275 ResultRegs[0] = Lo.getReg(0);
5276 ResultRegs[1] = Hi.getReg(0);
5277 break;
5278 }
5279 default:
5280 llvm_unreachable("not a shift");
5281 }
5282
5283 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5284 MI.eraseFromParent();
5285 return Legalized;
5286}
5287
5290 LLT MoreTy) {
5291 assert(TypeIdx == 0 && "Expecting only Idx 0");
5292
5294 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5295 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5297 moreElementsVectorSrc(MI, MoreTy, I);
5298 }
5299
5300 MachineBasicBlock &MBB = *MI.getParent();
5302 moreElementsVectorDst(MI, MoreTy, 0);
5304 return Legalized;
5305}
5306
5307MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5308 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5309 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5310
5311 switch (Opcode) {
5312 default:
5314 "getNeutralElementForVecReduce called with invalid opcode!");
5315 case TargetOpcode::G_VECREDUCE_ADD:
5316 case TargetOpcode::G_VECREDUCE_OR:
5317 case TargetOpcode::G_VECREDUCE_XOR:
5318 case TargetOpcode::G_VECREDUCE_UMAX:
5319 return MIRBuilder.buildConstant(Ty, 0);
5320 case TargetOpcode::G_VECREDUCE_MUL:
5321 return MIRBuilder.buildConstant(Ty, 1);
5322 case TargetOpcode::G_VECREDUCE_AND:
5323 case TargetOpcode::G_VECREDUCE_UMIN:
5326 case TargetOpcode::G_VECREDUCE_SMAX:
5329 case TargetOpcode::G_VECREDUCE_SMIN:
5332 case TargetOpcode::G_VECREDUCE_FADD:
5333 return MIRBuilder.buildFConstant(Ty, -0.0);
5334 case TargetOpcode::G_VECREDUCE_FMUL:
5335 return MIRBuilder.buildFConstant(Ty, 1.0);
5336 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5337 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5338 assert(false && "getNeutralElementForVecReduce unimplemented for "
5339 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5340 }
5341 llvm_unreachable("switch expected to return!");
5342}
5343
5346 LLT MoreTy) {
5347 unsigned Opc = MI.getOpcode();
5348 switch (Opc) {
5349 case TargetOpcode::G_IMPLICIT_DEF:
5350 case TargetOpcode::G_LOAD: {
5351 if (TypeIdx != 0)
5352 return UnableToLegalize;
5354 moreElementsVectorDst(MI, MoreTy, 0);
5356 return Legalized;
5357 }
5358 case TargetOpcode::G_STORE:
5359 if (TypeIdx != 0)
5360 return UnableToLegalize;
5362 moreElementsVectorSrc(MI, MoreTy, 0);
5364 return Legalized;
5365 case TargetOpcode::G_AND:
5366 case TargetOpcode::G_OR:
5367 case TargetOpcode::G_XOR:
5368 case TargetOpcode::G_ADD:
5369 case TargetOpcode::G_SUB:
5370 case TargetOpcode::G_MUL:
5371 case TargetOpcode::G_FADD:
5372 case TargetOpcode::G_FSUB:
5373 case TargetOpcode::G_FMUL:
5374 case TargetOpcode::G_FDIV:
5375 case TargetOpcode::G_FCOPYSIGN:
5376 case TargetOpcode::G_UADDSAT:
5377 case TargetOpcode::G_USUBSAT:
5378 case TargetOpcode::G_SADDSAT:
5379 case TargetOpcode::G_SSUBSAT:
5380 case TargetOpcode::G_SMIN:
5381 case TargetOpcode::G_SMAX:
5382 case TargetOpcode::G_UMIN:
5383 case TargetOpcode::G_UMAX:
5384 case TargetOpcode::G_FMINNUM:
5385 case TargetOpcode::G_FMAXNUM:
5386 case TargetOpcode::G_FMINNUM_IEEE:
5387 case TargetOpcode::G_FMAXNUM_IEEE:
5388 case TargetOpcode::G_FMINIMUM:
5389 case TargetOpcode::G_FMAXIMUM:
5390 case TargetOpcode::G_STRICT_FADD:
5391 case TargetOpcode::G_STRICT_FSUB:
5392 case TargetOpcode::G_STRICT_FMUL:
5393 case TargetOpcode::G_SHL:
5394 case TargetOpcode::G_ASHR:
5395 case TargetOpcode::G_LSHR: {
5397 moreElementsVectorSrc(MI, MoreTy, 1);
5398 moreElementsVectorSrc(MI, MoreTy, 2);
5399 moreElementsVectorDst(MI, MoreTy, 0);
5401 return Legalized;
5402 }
5403 case TargetOpcode::G_FMA:
5404 case TargetOpcode::G_STRICT_FMA:
5405 case TargetOpcode::G_FSHR:
5406 case TargetOpcode::G_FSHL: {
5408 moreElementsVectorSrc(MI, MoreTy, 1);
5409 moreElementsVectorSrc(MI, MoreTy, 2);
5410 moreElementsVectorSrc(MI, MoreTy, 3);
5411 moreElementsVectorDst(MI, MoreTy, 0);
5413 return Legalized;
5414 }
5415 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5416 case TargetOpcode::G_EXTRACT:
5417 if (TypeIdx != 1)
5418 return UnableToLegalize;
5420 moreElementsVectorSrc(MI, MoreTy, 1);
5422 return Legalized;
5423 case TargetOpcode::G_INSERT:
5424 case TargetOpcode::G_INSERT_VECTOR_ELT:
5425 case TargetOpcode::G_FREEZE:
5426 case TargetOpcode::G_FNEG:
5427 case TargetOpcode::G_FABS:
5428 case TargetOpcode::G_FSQRT:
5429 case TargetOpcode::G_FCEIL:
5430 case TargetOpcode::G_FFLOOR:
5431 case TargetOpcode::G_FNEARBYINT:
5432 case TargetOpcode::G_FRINT:
5433 case TargetOpcode::G_INTRINSIC_ROUND:
5434 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5435 case TargetOpcode::G_INTRINSIC_TRUNC:
5436 case TargetOpcode::G_BSWAP:
5437 case TargetOpcode::G_FCANONICALIZE:
5438 case TargetOpcode::G_SEXT_INREG:
5439 case TargetOpcode::G_ABS:
5440 if (TypeIdx != 0)
5441 return UnableToLegalize;
5443 moreElementsVectorSrc(MI, MoreTy, 1);
5444 moreElementsVectorDst(MI, MoreTy, 0);
5446 return Legalized;
5447 case TargetOpcode::G_SELECT: {
5448 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5449 if (TypeIdx == 1) {
5450 if (!CondTy.isScalar() ||
5451 DstTy.getElementCount() != MoreTy.getElementCount())
5452 return UnableToLegalize;
5453
5454 // This is turning a scalar select of vectors into a vector
5455 // select. Broadcast the select condition.
5456 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5458 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5460 return Legalized;
5461 }
5462
5463 if (CondTy.isVector())
5464 return UnableToLegalize;
5465
5467 moreElementsVectorSrc(MI, MoreTy, 2);
5468 moreElementsVectorSrc(MI, MoreTy, 3);
5469 moreElementsVectorDst(MI, MoreTy, 0);
5471 return Legalized;
5472 }
5473 case TargetOpcode::G_UNMERGE_VALUES:
5474 return UnableToLegalize;
5475 case TargetOpcode::G_PHI:
5476 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5477 case TargetOpcode::G_SHUFFLE_VECTOR:
5478 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5479 case TargetOpcode::G_BUILD_VECTOR: {
5481 for (auto Op : MI.uses()) {
5482 Elts.push_back(Op.getReg());
5483 }
5484
5485 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5487 }
5488
5490 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5491 MI.eraseFromParent();
5492 return Legalized;
5493 }
5494 case TargetOpcode::G_SEXT:
5495 case TargetOpcode::G_ZEXT:
5496 case TargetOpcode::G_ANYEXT:
5497 case TargetOpcode::G_TRUNC:
5498 case TargetOpcode::G_FPTRUNC:
5499 case TargetOpcode::G_FPEXT:
5500 case TargetOpcode::G_FPTOSI:
5501 case TargetOpcode::G_FPTOUI:
5502 case TargetOpcode::G_SITOFP:
5503 case TargetOpcode::G_UITOFP: {
5505 LLT SrcExtTy;
5506 LLT DstExtTy;
5507 if (TypeIdx == 0) {
5508 DstExtTy = MoreTy;
5509 SrcExtTy = LLT::fixed_vector(
5510 MoreTy.getNumElements(),
5511 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5512 } else {
5513 DstExtTy = LLT::fixed_vector(
5514 MoreTy.getNumElements(),
5515 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5516 SrcExtTy = MoreTy;
5517 }
5518 moreElementsVectorSrc(MI, SrcExtTy, 1);
5519 moreElementsVectorDst(MI, DstExtTy, 0);
5521 return Legalized;
5522 }
5523 case TargetOpcode::G_ICMP:
5524 case TargetOpcode::G_FCMP: {
5525 if (TypeIdx != 1)
5526 return UnableToLegalize;
5527
5529 moreElementsVectorSrc(MI, MoreTy, 2);
5530 moreElementsVectorSrc(MI, MoreTy, 3);
5531 LLT CondTy = LLT::fixed_vector(
5532 MoreTy.getNumElements(),
5533 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5534 moreElementsVectorDst(MI, CondTy, 0);
5536 return Legalized;
5537 }
5538 case TargetOpcode::G_BITCAST: {
5539 if (TypeIdx != 0)
5540 return UnableToLegalize;
5541
5542 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5543 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5544
5545 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5546 if (coefficient % DstTy.getNumElements() != 0)
5547 return UnableToLegalize;
5548
5549 coefficient = coefficient / DstTy.getNumElements();
5550
5551 LLT NewTy = SrcTy.changeElementCount(
5552 ElementCount::get(coefficient, MoreTy.isScalable()));
5554 moreElementsVectorSrc(MI, NewTy, 1);
5555 moreElementsVectorDst(MI, MoreTy, 0);
5557 return Legalized;
5558 }
5559 case TargetOpcode::G_VECREDUCE_FADD:
5560 case TargetOpcode::G_VECREDUCE_FMUL:
5561 case TargetOpcode::G_VECREDUCE_ADD:
5562 case TargetOpcode::G_VECREDUCE_MUL:
5563 case TargetOpcode::G_VECREDUCE_AND:
5564 case TargetOpcode::G_VECREDUCE_OR:
5565 case TargetOpcode::G_VECREDUCE_XOR:
5566 case TargetOpcode::G_VECREDUCE_SMAX:
5567 case TargetOpcode::G_VECREDUCE_SMIN:
5568 case TargetOpcode::G_VECREDUCE_UMAX:
5569 case TargetOpcode::G_VECREDUCE_UMIN: {
5570 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5571 MachineOperand &MO = MI.getOperand(1);
5572 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5573 auto NeutralElement = getNeutralElementForVecReduce(
5574 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5575
5577 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5578 i != e; i++) {
5579 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5580 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5581 NeutralElement, Idx);
5582 }
5583
5585 MO.setReg(NewVec.getReg(0));
5587 return Legalized;
5588 }
5589
5590 default:
5591 return UnableToLegalize;
5592 }
5593}
5594
5597 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5598 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5599 unsigned MaskNumElts = Mask.size();
5600 unsigned SrcNumElts = SrcTy.getNumElements();
5601 LLT DestEltTy = DstTy.getElementType();
5602
5603 if (MaskNumElts == SrcNumElts)
5604 return Legalized;
5605
5606 if (MaskNumElts < SrcNumElts) {
5607 // Extend mask to match new destination vector size with
5608 // undef values.
5609 SmallVector<int, 16> NewMask(Mask);
5610 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5611 NewMask.push_back(-1);
5612
5613 moreElementsVectorDst(MI, SrcTy, 0);
5615 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5616 MI.getOperand(1).getReg(),
5617 MI.getOperand(2).getReg(), NewMask);
5618 MI.eraseFromParent();
5619
5620 return Legalized;
5621 }
5622
5623 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5624 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5625 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5626
5627 // Create new source vectors by concatenating the initial
5628 // source vectors with undefined vectors of the same size.
5629 auto Undef = MIRBuilder.buildUndef(SrcTy);
5630 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5631 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5632 MOps1[0] = MI.getOperand(1).getReg();
5633 MOps2[0] = MI.getOperand(2).getReg();
5634
5635 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5636 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5637
5638 // Readjust mask for new input vector length.
5639 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5640 for (unsigned I = 0; I != MaskNumElts; ++I) {
5641 int Idx = Mask[I];
5642 if (Idx >= static_cast<int>(SrcNumElts))
5643 Idx += PaddedMaskNumElts - SrcNumElts;
5644 MappedOps[I] = Idx;
5645 }
5646
5647 // If we got more elements than required, extract subvector.
5648 if (MaskNumElts != PaddedMaskNumElts) {
5649 auto Shuffle =
5650 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5651
5652 SmallVector<Register, 16> Elts(MaskNumElts);
5653 for (unsigned I = 0; I < MaskNumElts; ++I) {
5654 Elts[I] =
5656 .getReg(0);
5657 }
5658 MIRBuilder.buildBuildVector(DstReg, Elts);
5659 } else {
5660 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5661 }
5662
5663 MI.eraseFromParent();
5665}
5666
5669 unsigned int TypeIdx, LLT MoreTy) {
5670 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5671 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5672 unsigned NumElts = DstTy.getNumElements();
5673 unsigned WidenNumElts = MoreTy.getNumElements();
5674
5675 if (DstTy.isVector() && Src1Ty.isVector() &&
5676 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5678 }
5679
5680 if (TypeIdx != 0)
5681 return UnableToLegalize;
5682
5683 // Expect a canonicalized shuffle.
5684 if (DstTy != Src1Ty || DstTy != Src2Ty)
5685 return UnableToLegalize;
5686
5687 moreElementsVectorSrc(MI, MoreTy, 1);
5688 moreElementsVectorSrc(MI, MoreTy, 2);
5689
5690 // Adjust mask based on new input vector length.
5691 SmallVector<int, 16> NewMask;
5692 for (unsigned I = 0; I != NumElts; ++I) {
5693 int Idx = Mask[I];
5694 if (Idx < static_cast<int>(NumElts))
5695 NewMask.push_back(Idx);
5696 else
5697 NewMask.push_back(Idx - NumElts + WidenNumElts);
5698 }
5699 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5700 NewMask.push_back(-1);
5701 moreElementsVectorDst(MI, MoreTy, 0);
5703 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5704 MI.getOperand(1).getReg(),
5705 MI.getOperand(2).getReg(), NewMask);
5706 MI.eraseFromParent();
5707 return Legalized;
5708}
5709
5710void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5711 ArrayRef<Register> Src1Regs,
5712 ArrayRef<Register> Src2Regs,
5713 LLT NarrowTy) {
5715 unsigned SrcParts = Src1Regs.size();
5716 unsigned DstParts = DstRegs.size();
5717
5718 unsigned DstIdx = 0; // Low bits of the result.
5719 Register FactorSum =
5720 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5721 DstRegs[DstIdx] = FactorSum;
5722
5723 unsigned CarrySumPrevDstIdx;
5725
5726 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5727 // Collect low parts of muls for DstIdx.
5728 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5729 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5731 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5732 Factors.push_back(Mul.getReg(0));
5733 }
5734 // Collect high parts of muls from previous DstIdx.
5735 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5736 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5737 MachineInstrBuilder Umulh =
5738 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5739 Factors.push_back(Umulh.getReg(0));
5740 }
5741 // Add CarrySum from additions calculated for previous DstIdx.
5742 if (DstIdx != 1) {
5743 Factors.push_back(CarrySumPrevDstIdx);
5744 }
5745
5746 Register CarrySum;
5747 // Add all factors and accumulate all carries into CarrySum.
5748 if (DstIdx != DstParts - 1) {
5749 MachineInstrBuilder Uaddo =
5750 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5751 FactorSum = Uaddo.getReg(0);
5752 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5753 for (unsigned i = 2; i < Factors.size(); ++i) {
5754 MachineInstrBuilder Uaddo =
5755 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5756 FactorSum = Uaddo.getReg(0);
5757 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5758 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5759 }
5760 } else {
5761 // Since value for the next index is not calculated, neither is CarrySum.
5762 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5763 for (unsigned i = 2; i < Factors.size(); ++i)
5764 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5765 }
5766
5767 CarrySumPrevDstIdx = CarrySum;
5768 DstRegs[DstIdx] = FactorSum;
5769 Factors.clear();
5770 }
5771}
5772
5775 LLT NarrowTy) {
5776 if (TypeIdx != 0)
5777 return UnableToLegalize;
5778
5779 Register DstReg = MI.getOperand(0).getReg();
5780 LLT DstType = MRI.getType(DstReg);
5781 // FIXME: add support for vector types
5782 if (DstType.isVector())
5783 return UnableToLegalize;
5784
5785 unsigned Opcode = MI.getOpcode();
5786 unsigned OpO, OpE, OpF;
5787 switch (Opcode) {
5788 case TargetOpcode::G_SADDO:
5789 case TargetOpcode::G_SADDE:
5790 case TargetOpcode::G_UADDO:
5791 case TargetOpcode::G_UADDE:
5792 case TargetOpcode::G_ADD:
5793 OpO = TargetOpcode::G_UADDO;
5794 OpE = TargetOpcode::G_UADDE;
5795 OpF = TargetOpcode::G_UADDE;
5796 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5797 OpF = TargetOpcode::G_SADDE;
5798 break;
5799 case TargetOpcode::G_SSUBO:
5800 case TargetOpcode::G_SSUBE:
5801 case TargetOpcode::G_USUBO:
5802 case TargetOpcode::G_USUBE:
5803 case TargetOpcode::G_SUB:
5804 OpO = TargetOpcode::G_USUBO;
5805 OpE = TargetOpcode::G_USUBE;
5806 OpF = TargetOpcode::G_USUBE;
5807 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5808 OpF = TargetOpcode::G_SSUBE;
5809 break;
5810 default:
5811 llvm_unreachable("Unexpected add/sub opcode!");
5812 }
5813
5814 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5815 unsigned NumDefs = MI.getNumExplicitDefs();
5816 Register Src1 = MI.getOperand(NumDefs).getReg();
5817 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5818 Register CarryDst, CarryIn;
5819 if (NumDefs == 2)
5820 CarryDst = MI.getOperand(1).getReg();
5821 if (MI.getNumOperands() == NumDefs + 3)
5822 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5823
5824 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5825 LLT LeftoverTy, DummyTy;
5826 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5827 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5828 MIRBuilder, MRI);
5829 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5830 MRI);
5831
5832 int NarrowParts = Src1Regs.size();
5833 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5834 Src1Regs.push_back(Src1Left[I]);
5835 Src2Regs.push_back(Src2Left[I]);
5836 }
5837 DstRegs.reserve(Src1Regs.size());
5838
5839 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5840 Register DstReg =
5841 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5843 // Forward the final carry-out to the destination register
5844 if (i == e - 1 && CarryDst)
5845 CarryOut = CarryDst;
5846
5847 if (!CarryIn) {
5848 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5849 {Src1Regs[i], Src2Regs[i]});
5850 } else if (i == e - 1) {
5851 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5852 {Src1Regs[i], Src2Regs[i], CarryIn});
5853 } else {
5854 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5855 {Src1Regs[i], Src2Regs[i], CarryIn});
5856 }
5857
5858 DstRegs.push_back(DstReg);
5859 CarryIn = CarryOut;
5860 }
5861 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5862 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5863 ArrayRef(DstRegs).drop_front(NarrowParts));
5864
5865 MI.eraseFromParent();
5866 return Legalized;
5867}
5868
5871 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5872
5873 LLT Ty = MRI.getType(DstReg);
5874 if (Ty.isVector())
5875 return UnableToLegalize;
5876
5877 unsigned Size = Ty.getSizeInBits();
5878 unsigned NarrowSize = NarrowTy.getSizeInBits();
5879 if (Size % NarrowSize != 0)
5880 return UnableToLegalize;
5881
5882 unsigned NumParts = Size / NarrowSize;
5883 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5884 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5885
5886 SmallVector<Register, 2> Src1Parts, Src2Parts;
5887 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5888 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5889 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5890 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5891
5892 // Take only high half of registers if this is high mul.
5893 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5894 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5895 MI.eraseFromParent();
5896 return Legalized;
5897}
5898
5901 LLT NarrowTy) {
5902 if (TypeIdx != 0)
5903 return UnableToLegalize;
5904
5905 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5906
5907 Register Src = MI.getOperand(1).getReg();
5908 LLT SrcTy = MRI.getType(Src);
5909
5910 // If all finite floats fit into the narrowed integer type, we can just swap
5911 // out the result type. This is practically only useful for conversions from
5912 // half to at least 16-bits, so just handle the one case.
5913 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5914 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5915 return UnableToLegalize;
5916
5918 narrowScalarDst(MI, NarrowTy, 0,
5919 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5921 return Legalized;
5922}
5923
5926 LLT NarrowTy) {
5927 if (TypeIdx != 1)
5928 return UnableToLegalize;
5929
5930 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5931
5932 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5933 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5934 // NarrowSize.
5935 if (SizeOp1 % NarrowSize != 0)
5936 return UnableToLegalize;
5937 int NumParts = SizeOp1 / NarrowSize;
5938
5939 SmallVector<Register, 2> SrcRegs, DstRegs;
5941 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
5942 MIRBuilder, MRI);
5943
5944 Register OpReg = MI.getOperand(0).getReg();
5945 uint64_t OpStart = MI.getOperand(2).getImm();
5946 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5947 for (int i = 0; i < NumParts; ++i) {
5948 unsigned SrcStart = i * NarrowSize;
5949
5950 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5951 // No part of the extract uses this subregister, ignore it.
5952 continue;
5953 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5954 // The entire subregister is extracted, forward the value.
5955 DstRegs.push_back(SrcRegs[i]);
5956 continue;
5957 }
5958
5959 // OpSegStart is where this destination segment would start in OpReg if it
5960 // extended infinitely in both directions.
5961 int64_t ExtractOffset;
5962 uint64_t SegSize;
5963 if (OpStart < SrcStart) {
5964 ExtractOffset = 0;
5965 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5966 } else {
5967 ExtractOffset = OpStart - SrcStart;
5968 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5969 }
5970
5971 Register SegReg = SrcRegs[i];
5972 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5973 // A genuine extract is needed.
5974 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5975 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5976 }
5977
5978 DstRegs.push_back(SegReg);
5979 }
5980
5981 Register DstReg = MI.getOperand(0).getReg();
5982 if (MRI.getType(DstReg).isVector())
5983 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5984 else if (DstRegs.size() > 1)
5985 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5986 else
5987 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5988 MI.eraseFromParent();
5989 return Legalized;
5990}
5991
5994 LLT NarrowTy) {
5995 // FIXME: Don't know how to handle secondary types yet.
5996 if (TypeIdx != 0)
5997 return UnableToLegalize;
5998
5999 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6001 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6002 LLT LeftoverTy;
6003 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6004 LeftoverRegs, MIRBuilder, MRI);
6005
6006 for (Register Reg : LeftoverRegs)
6007 SrcRegs.push_back(Reg);
6008
6009 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6010 Register OpReg = MI.getOperand(2).getReg();
6011 uint64_t OpStart = MI.getOperand(3).getImm();
6012 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6013 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6014 unsigned DstStart = I * NarrowSize;
6015
6016 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6017 // The entire subregister is defined by this insert, forward the new
6018 // value.
6019 DstRegs.push_back(OpReg);
6020 continue;
6021 }
6022
6023 Register SrcReg = SrcRegs[I];
6024 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6025 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6026 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6027 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6028 }
6029
6030 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6031 // No part of the insert affects this subregister, forward the original.
6032 DstRegs.push_back(SrcReg);
6033 continue;
6034 }
6035
6036 // OpSegStart is where this destination segment would start in OpReg if it
6037 // extended infinitely in both directions.
6038 int64_t ExtractOffset, InsertOffset;
6039 uint64_t SegSize;
6040 if (OpStart < DstStart) {
6041 InsertOffset = 0;
6042 ExtractOffset = DstStart - OpStart;
6043 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6044 } else {
6045 InsertOffset = OpStart - DstStart;
6046 ExtractOffset = 0;
6047 SegSize =
6048 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6049 }
6050
6051 Register SegReg = OpReg;
6052 if (ExtractOffset != 0 || SegSize != OpSize) {
6053 // A genuine extract is needed.
6054 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6055 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6056 }
6057
6058 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6059 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6060 DstRegs.push_back(DstReg);
6061 }
6062
6063 uint64_t WideSize = DstRegs.size() * NarrowSize;
6064 Register DstReg = MI.getOperand(0).getReg();
6065 if (WideSize > RegTy.getSizeInBits()) {
6066 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6067 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6068 MIRBuilder.buildTrunc(DstReg, MergeReg);
6069 } else
6070 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6071
6072 MI.eraseFromParent();
6073 return Legalized;
6074}
6075
6078 LLT NarrowTy) {
6079 Register DstReg = MI.getOperand(0).getReg();
6080 LLT DstTy = MRI.getType(DstReg);
6081
6082 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6083
6084 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6085 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6086 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6087 LLT LeftoverTy;
6088 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6089 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6090 return UnableToLegalize;
6091
6092 LLT Unused;
6093 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6094 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6095 llvm_unreachable("inconsistent extractParts result");
6096
6097 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6098 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6099 {Src0Regs[I], Src1Regs[I]});
6100 DstRegs.push_back(Inst.getReg(0));
6101 }
6102
6103 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6104 auto Inst = MIRBuilder.buildInstr(
6105 MI.getOpcode(),
6106 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6107 DstLeftoverRegs.push_back(Inst.getReg(0));
6108 }
6109
6110 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6111 LeftoverTy, DstLeftoverRegs);
6112
6113 MI.eraseFromParent();
6114 return Legalized;
6115}
6116
6119 LLT NarrowTy) {
6120 if (TypeIdx != 0)
6121 return UnableToLegalize;
6122
6123 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6124
6125 LLT DstTy = MRI.getType(DstReg);
6126 if (DstTy.isVector())
6127 return UnableToLegalize;
6128
6130 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6131 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6132 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6133
6134 MI.eraseFromParent();
6135 return Legalized;
6136}
6137
6140 LLT NarrowTy) {
6141 if (TypeIdx != 0)
6142 return UnableToLegalize;
6143
6144 Register CondReg = MI.getOperand(1).getReg();
6145 LLT CondTy = MRI.getType(CondReg);
6146 if (CondTy.isVector()) // TODO: Handle vselect
6147 return UnableToLegalize;
6148
6149 Register DstReg = MI.getOperand(0).getReg();
6150 LLT DstTy = MRI.getType(DstReg);
6151
6152 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6153 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6154 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6155 LLT LeftoverTy;
6156 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6157 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6158 return UnableToLegalize;
6159
6160 LLT Unused;
6161 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6162 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6163 llvm_unreachable("inconsistent extractParts result");
6164
6165 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6166 auto Select = MIRBuilder.buildSelect(NarrowTy,
6167 CondReg, Src1Regs[I], Src2Regs[I]);
6168 DstRegs.push_back(Select.getReg(0));
6169 }
6170
6171 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6173 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6174 DstLeftoverRegs.push_back(Select.getReg(0));
6175 }
6176
6177 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6178 LeftoverTy, DstLeftoverRegs);
6179
6180 MI.eraseFromParent();
6181 return Legalized;
6182}
6183
6186 LLT NarrowTy) {
6187 if (TypeIdx != 1)
6188 return UnableToLegalize;
6189
6190 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6191 unsigned NarrowSize = NarrowTy.getSizeInBits();
6192
6193 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6194 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6195
6197 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6198 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6199 auto C_0 = B.buildConstant(NarrowTy, 0);
6200 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6201 UnmergeSrc.getReg(1), C_0);
6202 auto LoCTLZ = IsUndef ?
6203 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6204 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6205 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6206 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6207 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6208 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6209
6210 MI.eraseFromParent();
6211 return Legalized;
6212 }
6213
6214 return UnableToLegalize;
6215}
6216
6219 LLT NarrowTy) {
6220 if (TypeIdx != 1)
6221 return UnableToLegalize;
6222
6223 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6224 unsigned NarrowSize = NarrowTy.getSizeInBits();
6225
6226 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6227 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6228
6230 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6231 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6232 auto C_0 = B.buildConstant(NarrowTy, 0);
6233 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6234 UnmergeSrc.getReg(0), C_0);
6235 auto HiCTTZ = IsUndef ?
6236 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6237 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6238 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6239 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6240 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6241 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6242
6243 MI.eraseFromParent();
6244 return Legalized;
6245 }
6246
6247 return UnableToLegalize;
6248}
6249
6252 LLT NarrowTy) {
6253 if (TypeIdx != 1)
6254 return UnableToLegalize;
6255
6256 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6257 unsigned NarrowSize = NarrowTy.getSizeInBits();
6258
6259 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6260 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6261
6262 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6263 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6264 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6265
6266 MI.eraseFromParent();
6267 return Legalized;
6268 }
6269
6270 return UnableToLegalize;
6271}
6272
6275 LLT NarrowTy) {
6276 if (TypeIdx != 1)
6277 return UnableToLegalize;
6278
6280 Register ExpReg = MI.getOperand(2).getReg();
6281 LLT ExpTy = MRI.getType(ExpReg);
6282
6283 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6284
6285 // Clamp the exponent to the range of the target type.
6286 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6287 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6288 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6289 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6290
6291 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6293 MI.getOperand(2).setReg(Trunc.getReg(0));
6295 return Legalized;
6296}
6297
6300 unsigned Opc = MI.getOpcode();
6301 const auto &TII = MIRBuilder.getTII();
6302 auto isSupported = [this](const LegalityQuery &Q) {
6303 auto QAction = LI.getAction(Q).Action;
6304 return QAction == Legal || QAction == Libcall || QAction == Custom;
6305 };
6306 switch (Opc) {
6307 default:
6308 return UnableToLegalize;
6309 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6310 // This trivially expands to CTLZ.
6312 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6314 return Legalized;
6315 }
6316 case TargetOpcode::G_CTLZ: {
6317 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6318 unsigned Len = SrcTy.getSizeInBits();
6319
6320 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6321 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6322 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6323 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6324 auto ICmp = MIRBuilder.buildICmp(
6325 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6326 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6327 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6328 MI.eraseFromParent();
6329 return Legalized;
6330 }
6331 // for now, we do this:
6332 // NewLen = NextPowerOf2(Len);
6333 // x = x | (x >> 1);
6334 // x = x | (x >> 2);
6335 // ...
6336 // x = x | (x >>16);
6337 // x = x | (x >>32); // for 64-bit input
6338 // Upto NewLen/2
6339 // return Len - popcount(x);
6340 //
6341 // Ref: "Hacker's Delight" by Henry Warren
6342 Register Op = SrcReg;
6343 unsigned NewLen = PowerOf2Ceil(Len);
6344 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6345 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6346 auto MIBOp = MIRBuilder.buildOr(
6347 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6348 Op = MIBOp.getReg(0);
6349 }
6350 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6351 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6352 MIBPop);
6353 MI.eraseFromParent();
6354 return Legalized;
6355 }
6356 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6357 // This trivially expands to CTTZ.
6359 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6361 return Legalized;
6362 }
6363 case TargetOpcode::G_CTTZ: {
6364 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6365
6366 unsigned Len = SrcTy.getSizeInBits();
6367 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6368 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6369 // zero.
6370 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6371 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6372 auto ICmp = MIRBuilder.buildICmp(
6373 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6374 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6375 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6376 MI.eraseFromParent();
6377 return Legalized;
6378 }
6379 // for now, we use: { return popcount(~x & (x - 1)); }
6380 // unless the target has ctlz but not ctpop, in which case we use:
6381 // { return 32 - nlz(~x & (x-1)); }
6382 // Ref: "Hacker's Delight" by Henry Warren
6383 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6384 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6385 auto MIBTmp = MIRBuilder.buildAnd(
6386 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6387 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6388 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6389 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6390 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6391 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6392 MI.eraseFromParent();
6393 return Legalized;
6394 }
6396 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6397 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6399 return Legalized;
6400 }
6401 case TargetOpcode::G_CTPOP: {
6402 Register SrcReg = MI.getOperand(1).getReg();
6403 LLT Ty = MRI.getType(SrcReg);
6404 unsigned Size = Ty.getSizeInBits();
6406
6407 // Count set bits in blocks of 2 bits. Default approach would be
6408 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6409 // We use following formula instead:
6410 // B2Count = val - { (val >> 1) & 0x55555555 }
6411 // since it gives same result in blocks of 2 with one instruction less.
6412 auto C_1 = B.buildConstant(Ty, 1);
6413 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6414 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6415 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6416 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6417 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6418
6419 // In order to get count in blocks of 4 add values from adjacent block of 2.
6420 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6421 auto C_2 = B.buildConstant(Ty, 2);
6422 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6423 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6424 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6425 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6426 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6427 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6428
6429 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6430 // addition since count value sits in range {0,...,8} and 4 bits are enough
6431 // to hold such binary values. After addition high 4 bits still hold count
6432 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6433 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6434 auto C_4 = B.buildConstant(Ty, 4);
6435 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6436 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6437 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6438 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6439 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6440
6441 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6442 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6443 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6444 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6445
6446 // Shift count result from 8 high bits to low bits.
6447 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6448
6449 auto IsMulSupported = [this](const LLT Ty) {
6450 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6451 return Action == Legal || Action == WidenScalar || Action == Custom;
6452 };
6453 if (IsMulSupported(Ty)) {
6454 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6455 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6456 } else {
6457 auto ResTmp = B8Count;
6458 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6459 auto ShiftC = B.buildConstant(Ty, Shift);
6460 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6461 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6462 }
6463 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6464 }
6465 MI.eraseFromParent();
6466 return Legalized;
6467 }
6468 }
6469}
6470
6471// Check that (every element of) Reg is undef or not an exact multiple of BW.
6473 Register Reg, unsigned BW) {
6474 return matchUnaryPredicate(
6475 MRI, Reg,
6476 [=](const Constant *C) {
6477 // Null constant here means an undef.
6478 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6479 return !CI || CI->getValue().urem(BW) != 0;
6480 },
6481 /*AllowUndefs*/ true);
6482}
6483
6486 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6487 LLT Ty = MRI.getType(Dst);
6488 LLT ShTy = MRI.getType(Z);
6489
6490 unsigned BW = Ty.getScalarSizeInBits();
6491
6492 if (!isPowerOf2_32(BW))
6493 return UnableToLegalize;
6494
6495 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6496 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6497
6498 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6499 // fshl X, Y, Z -> fshr X, Y, -Z
6500 // fshr X, Y, Z -> fshl X, Y, -Z
6501 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6502 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6503 } else {
6504 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6505 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6506 auto One = MIRBuilder.buildConstant(ShTy, 1);
6507 if (IsFSHL) {
6508 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6509 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6510 } else {
6511 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6512 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6513 }
6514
6515 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6516 }
6517
6518 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6519 MI.eraseFromParent();
6520 return Legalized;
6521}
6522
6525 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6526 LLT Ty = MRI.getType(Dst);
6527 LLT ShTy = MRI.getType(Z);
6528
6529 const unsigned BW = Ty.getScalarSizeInBits();
6530 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6531
6532 Register ShX, ShY;
6533 Register ShAmt, InvShAmt;
6534
6535 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6536 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6537 // fshl: X << C | Y >> (BW - C)
6538 // fshr: X << (BW - C) | Y >> C
6539 // where C = Z % BW is not zero
6540 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6541 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6542 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6543 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6544 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6545 } else {
6546 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6547 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6548 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6549 if (isPowerOf2_32(BW)) {
6550 // Z % BW -> Z & (BW - 1)
6551 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6552 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6553 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6554 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6555 } else {
6556 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6557 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6558 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6559 }
6560
6561 auto One = MIRBuilder.buildConstant(ShTy, 1);
6562 if (IsFSHL) {
6563 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6564 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6565 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6566 } else {
6567 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6568 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6569 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6570 }
6571 }
6572
6573 MIRBuilder.buildOr(Dst, ShX, ShY);
6574 MI.eraseFromParent();
6575 return Legalized;
6576}
6577
6580 // These operations approximately do the following (while avoiding undefined
6581 // shifts by BW):
6582 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6583 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6584 Register Dst = MI.getOperand(0).getReg();
6585 LLT Ty = MRI.getType(Dst);
6586 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6587
6588 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6589 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6590
6591 // TODO: Use smarter heuristic that accounts for vector legalization.
6592 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6593 return lowerFunnelShiftAsShifts(MI);
6594
6595 // This only works for powers of 2, fallback to shifts if it fails.
6596 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6597 if (Result == UnableToLegalize)
6598 return lowerFunnelShiftAsShifts(MI);
6599 return Result;
6600}
6601
6603 auto [Dst, Src] = MI.getFirst2Regs();
6604 LLT DstTy = MRI.getType(Dst);
6605 LLT SrcTy = MRI.getType(Src);
6606
6607 uint32_t DstTySize = DstTy.getSizeInBits();
6608 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6609 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6610
6611 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6612 !isPowerOf2_32(SrcTyScalarSize))
6613 return UnableToLegalize;
6614
6615 // The step between extend is too large, split it by creating an intermediate
6616 // extend instruction
6617 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6618 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6619 // If the destination type is illegal, split it into multiple statements
6620 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6621 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6622 // Unmerge the vector
6623 LLT EltTy = MidTy.changeElementCount(
6625 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6626
6627 // ZExt the vectors
6628 LLT ZExtResTy = DstTy.changeElementCount(
6630 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6631 {UnmergeSrc.getReg(0)});
6632 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6633 {UnmergeSrc.getReg(1)});
6634
6635 // Merge the ending vectors
6636 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6637
6638 MI.eraseFromParent();
6639 return Legalized;
6640 }
6641 return UnableToLegalize;
6642}
6643
6645 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6647 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6648 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6649 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6650 // %lo16(<4 x s16>) = G_TRUNC %inlo
6651 // %hi16(<4 x s16>) = G_TRUNC %inhi
6652 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6653 // %res(<8 x s8>) = G_TRUNC %in16
6654
6655 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6656
6657 Register DstReg = MI.getOperand(0).getReg();
6658 Register SrcReg = MI.getOperand(1).getReg();
6659 LLT DstTy = MRI.getType(DstReg);
6660 LLT SrcTy = MRI.getType(SrcReg);
6661
6662 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6664 isPowerOf2_32(SrcTy.getNumElements()) &&
6666 // Split input type.
6667 LLT SplitSrcTy = SrcTy.changeElementCount(
6669
6670 // First, split the source into two smaller vectors.
6671 SmallVector<Register, 2> SplitSrcs;
6672 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6673
6674 // Truncate the splits into intermediate narrower elements.
6675 LLT InterTy;
6676 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6677 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6678 else
6679 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6680 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6681 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6682 }
6683
6684 // Combine the new truncates into one vector
6686 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6687
6688 // Truncate the new vector to the final result type
6689 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6690 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6691 else
6692 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6693
6694 MI.eraseFromParent();
6695
6696 return Legalized;
6697 }
6698 return UnableToLegalize;
6699}
6700
6703 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6704 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6705 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6706 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6707 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6708 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6709 MI.eraseFromParent();
6710 return Legalized;
6711}
6712
6714 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6715
6716 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6717 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6718
6720
6721 // If a rotate in the other direction is supported, use it.
6722 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6723 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6724 isPowerOf2_32(EltSizeInBits))
6725 return lowerRotateWithReverseRotate(MI);
6726
6727 // If a funnel shift is supported, use it.
6728 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6729 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6730 bool IsFShLegal = false;
6731 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6732 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6733 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6734 Register R3) {
6735 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6736 MI.eraseFromParent();
6737 return Legalized;
6738 };
6739 // If a funnel shift in the other direction is supported, use it.
6740 if (IsFShLegal) {
6741 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6742 } else if (isPowerOf2_32(EltSizeInBits)) {
6743 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6744 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6745 }
6746 }
6747
6748 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6749 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6750 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6751 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6752 Register ShVal;
6753 Register RevShiftVal;
6754 if (isPowerOf2_32(EltSizeInBits)) {
6755 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6756 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6757 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6758 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6759 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6760 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6761 RevShiftVal =
6762 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6763 } else {
6764 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6765 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6766 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6767 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6768 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6769 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6770 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6771 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6772 RevShiftVal =
6773 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6774 }
6775 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6776 MI.eraseFromParent();
6777 return Legalized;
6778}
6779
6780// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6781// representation.
6784 auto [Dst, Src] = MI.getFirst2Regs();
6785 const LLT S64 = LLT::scalar(64);
6786 const LLT S32 = LLT::scalar(32);
6787 const LLT S1 = LLT::scalar(1);
6788
6789 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6790
6791 // unsigned cul2f(ulong u) {
6792 // uint lz = clz(u);
6793 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6794 // u = (u << lz) & 0x7fffffffffffffffUL;
6795 // ulong t = u & 0xffffffffffUL;
6796 // uint v = (e << 23) | (uint)(u >> 40);
6797 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6798 // return as_float(v + r);
6799 // }
6800
6801 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6802 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6803
6804 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6805
6806 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6807 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6808
6809 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6810 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6811
6812 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6813 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6814
6815 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6816
6817 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6818 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6819
6820 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6821 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6822 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6823
6824 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6825 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6826 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6827 auto One = MIRBuilder.buildConstant(S32, 1);
6828
6829 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6830 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6831 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6832 MIRBuilder.buildAdd(Dst, V, R);
6833
6834 MI.eraseFromParent();
6835 return Legalized;
6836}
6837
6839 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6840
6841 if (SrcTy == LLT::scalar(1)) {
6842 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6843 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6844 MIRBuilder.buildSelect(Dst, Src, True, False);
6845 MI.eraseFromParent();
6846 return Legalized;
6847 }
6848
6849 if (SrcTy != LLT::scalar(64))
6850 return UnableToLegalize;
6851
6852 if (DstTy == LLT::scalar(32)) {
6853 // TODO: SelectionDAG has several alternative expansions to port which may
6854 // be more reasonble depending on the available instructions. If a target
6855 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6856 // intermediate type, this is probably worse.
6857 return lowerU64ToF32BitOps(MI);
6858 }
6859
6860 return UnableToLegalize;
6861}
6862
6864 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6865
6866 const LLT S64 = LLT::scalar(64);
6867 const LLT S32 = LLT::scalar(32);
6868 const LLT S1 = LLT::scalar(1);
6869
6870 if (SrcTy == S1) {
6871 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6872 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6873 MIRBuilder.buildSelect(Dst, Src, True, False);
6874 MI.eraseFromParent();
6875 return Legalized;
6876 }
6877
6878 if (SrcTy != S64)
6879 return UnableToLegalize;
6880
6881 if (DstTy == S32) {
6882 // signed cl2f(long l) {
6883 // long s = l >> 63;
6884 // float r = cul2f((l + s) ^ s);
6885 // return s ? -r : r;
6886 // }
6887 Register L = Src;
6888 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6889 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6890
6891 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6892 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6893 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6894
6895 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6896 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6898 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6899 MI.eraseFromParent();
6900 return Legalized;
6901 }
6902
6903 return UnableToLegalize;
6904}
6905
6907 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6908 const LLT S64 = LLT::scalar(64);
6909 const LLT S32 = LLT::scalar(32);
6910
6911 if (SrcTy != S64 && SrcTy != S32)
6912 return UnableToLegalize;
6913 if (DstTy != S32 && DstTy != S64)
6914 return UnableToLegalize;
6915
6916 // FPTOSI gives same result as FPTOUI for positive signed integers.
6917 // FPTOUI needs to deal with fp values that convert to unsigned integers
6918 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6919
6920 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
6921 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6923 APInt::getZero(SrcTy.getSizeInBits()));
6924 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
6925
6926 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
6927
6928 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
6929 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6930 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6931 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
6932 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
6933 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
6934 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
6935
6936 const LLT S1 = LLT::scalar(1);
6937
6938 MachineInstrBuilder FCMP =
6939 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
6940 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
6941
6942 MI.eraseFromParent();
6943 return Legalized;
6944}
6945
6947 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6948 const LLT S64 = LLT::scalar(64);
6949 const LLT S32 = LLT::scalar(32);
6950
6951 // FIXME: Only f32 to i64 conversions are supported.
6952 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6953 return UnableToLegalize;
6954
6955 // Expand f32 -> i64 conversion
6956 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6957 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6958
6959 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6960
6961 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
6962 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
6963
6964 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
6965 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
6966
6967 auto SignMask = MIRBuilder.buildConstant(SrcTy,
6968 APInt::getSignMask(SrcEltBits));
6969 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
6970 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
6971 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
6972 Sign = MIRBuilder.buildSExt(DstTy, Sign);
6973
6974 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
6975 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
6976 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
6977
6978 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
6979 R = MIRBuilder.buildZExt(DstTy, R);
6980
6981 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
6982 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
6983 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
6984 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
6985
6986 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
6987 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
6988
6989 const LLT S1 = LLT::scalar(1);
6991 S1, Exponent, ExponentLoBit);
6992
6993 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
6994
6995 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
6996 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
6997
6998 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
6999
7000 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7001 S1, Exponent, ZeroSrcTy);
7002
7003 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7004 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7005
7006 MI.eraseFromParent();
7007 return Legalized;
7008}
7009
7010// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7013 const LLT S1 = LLT::scalar(1);
7014 const LLT S32 = LLT::scalar(32);
7015
7016 auto [Dst, Src] = MI.getFirst2Regs();
7017 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7018 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7019
7020 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7021 return UnableToLegalize;
7022
7024 unsigned Flags = MI.getFlags();
7025 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7026 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7027 MI.eraseFromParent();
7028 return Legalized;
7029 }
7030
7031 const unsigned ExpMask = 0x7ff;
7032 const unsigned ExpBiasf64 = 1023;
7033 const unsigned ExpBiasf16 = 15;
7034
7035 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7036 Register U = Unmerge.getReg(0);
7037 Register UH = Unmerge.getReg(1);
7038
7039 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7041
7042 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7043 // add the f16 bias (15) to get the biased exponent for the f16 format.
7044 E = MIRBuilder.buildAdd(
7045 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7046
7049
7050 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7051 MIRBuilder.buildConstant(S32, 0x1ff));
7052 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7053
7054 auto Zero = MIRBuilder.buildConstant(S32, 0);
7055 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7056 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7057 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7058
7059 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7060 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7061 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7062 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7063
7064 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7065 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7066
7067 // N = M | (E << 12);
7068 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7069 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7070
7071 // B = clamp(1-E, 0, 13);
7072 auto One = MIRBuilder.buildConstant(S32, 1);
7073 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7074 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7076
7077 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7078 MIRBuilder.buildConstant(S32, 0x1000));
7079
7080 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7081 auto D0 = MIRBuilder.buildShl(S32, D, B);
7082
7083 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7084 D0, SigSetHigh);
7085 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7086 D = MIRBuilder.buildOr(S32, D, D1);
7087
7088 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7089 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7090
7091 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7093
7094 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7096 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7097
7098 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7100 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7101
7102 V1 = MIRBuilder.buildOr(S32, V0, V1);
7103 V = MIRBuilder.buildAdd(S32, V, V1);
7104
7105 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7106 E, MIRBuilder.buildConstant(S32, 30));
7107 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7108 MIRBuilder.buildConstant(S32, 0x7c00), V);
7109
7110 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7111 E, MIRBuilder.buildConstant(S32, 1039));
7112 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7113
7114 // Extract the sign bit.
7115 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7116 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7117
7118 // Insert the sign bit
7119 V = MIRBuilder.buildOr(S32, Sign, V);
7120
7121 MIRBuilder.buildTrunc(Dst, V);
7122 MI.eraseFromParent();
7123 return Legalized;
7124}
7125
7128 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7129 const LLT S64 = LLT::scalar(64);
7130 const LLT S16 = LLT::scalar(16);
7131
7132 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7134
7135 return UnableToLegalize;
7136}
7137
7138// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7139// multiplication tree.
7141 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7142 LLT Ty = MRI.getType(Dst);
7143
7144 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7145 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7146 MI.eraseFromParent();
7147 return Legalized;
7148}
7149
7151 switch (Opc) {
7152 case TargetOpcode::G_SMIN:
7153 return CmpInst::ICMP_SLT;
7154 case TargetOpcode::G_SMAX:
7155 return CmpInst::ICMP_SGT;
7156 case TargetOpcode::G_UMIN:
7157 return CmpInst::ICMP_ULT;
7158 case TargetOpcode::G_UMAX:
7159 return CmpInst::ICMP_UGT;
7160 default:
7161 llvm_unreachable("not in integer min/max");
7162 }
7163}
7164
7166 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7167
7168 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7169 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7170
7171 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7172 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7173
7174 MI.eraseFromParent();
7175 return Legalized;
7176}
7177
7180 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7181 const int Src0Size = Src0Ty.getScalarSizeInBits();
7182 const int Src1Size = Src1Ty.getScalarSizeInBits();
7183
7184 auto SignBitMask = MIRBuilder.buildConstant(
7185 Src0Ty, APInt::getSignMask(Src0Size));
7186
7187 auto NotSignBitMask = MIRBuilder.buildConstant(
7188 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7189
7190 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7191 Register And1;
7192 if (Src0Ty == Src1Ty) {
7193 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7194 } else if (Src0Size > Src1Size) {
7195 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7196 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7197 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7198 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7199 } else {
7200 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7201 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7202 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7203 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7204 }
7205
7206 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7207 // constants are a nan and -0.0, but the final result should preserve
7208 // everything.
7209 unsigned Flags = MI.getFlags();
7210 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7211
7212 MI.eraseFromParent();
7213 return Legalized;
7214}
7215
7218 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7219 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7220
7221 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7222 LLT Ty = MRI.getType(Dst);
7223
7224 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7225 // Insert canonicalizes if it's possible we need to quiet to get correct
7226 // sNaN behavior.
7227
7228 // Note this must be done here, and not as an optimization combine in the
7229 // absence of a dedicate quiet-snan instruction as we're using an
7230 // omni-purpose G_FCANONICALIZE.
7231 if (!isKnownNeverSNaN(Src0, MRI))
7232 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7233
7234 if (!isKnownNeverSNaN(Src1, MRI))
7235 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7236 }
7237
7238 // If there are no nans, it's safe to simply replace this with the non-IEEE
7239 // version.
7240 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7241 MI.eraseFromParent();
7242 return Legalized;
7243}
7244
7246 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7247 Register DstReg = MI.getOperand(0).getReg();
7248 LLT Ty = MRI.getType(DstReg);
7249 unsigned Flags = MI.getFlags();
7250
7251 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7252 Flags);
7253 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7254 MI.eraseFromParent();
7255 return Legalized;
7256}
7257
7260 auto [DstReg, X] = MI.getFirst2Regs();
7261 const unsigned Flags = MI.getFlags();
7262 const LLT Ty = MRI.getType(DstReg);
7263 const LLT CondTy = Ty.changeElementSize(1);
7264
7265 // round(x) =>
7266 // t = trunc(x);
7267 // d = fabs(x - t);
7268 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7269 // return t + o;
7270
7271 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7272
7273 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7274 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7275
7276 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7277 auto Cmp =
7278 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7279
7280 // Could emit G_UITOFP instead
7281 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7282 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7283 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7284 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7285
7286 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7287
7288 MI.eraseFromParent();
7289 return Legalized;
7290}
7291
7293 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7294 unsigned Flags = MI.getFlags();
7295 LLT Ty = MRI.getType(DstReg);
7296 const LLT CondTy = Ty.changeElementSize(1);
7297
7298 // result = trunc(src);
7299 // if (src < 0.0 && src != result)
7300 // result += -1.0.
7301
7302 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7303 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7304
7305 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7306 SrcReg, Zero, Flags);
7307 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7308 SrcReg, Trunc, Flags);
7309 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7310 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7311
7312 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7313 MI.eraseFromParent();
7314 return Legalized;
7315}
7316
7319 const unsigned NumOps = MI.getNumOperands();
7320 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7321 unsigned PartSize = Src0Ty.getSizeInBits();
7322
7323 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7324 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7325
7326 for (unsigned I = 2; I != NumOps; ++I) {
7327 const unsigned Offset = (I - 1) * PartSize;
7328
7329 Register SrcReg = MI.getOperand(I).getReg();
7330 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7331
7332 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7333 MRI.createGenericVirtualRegister(WideTy);
7334
7335 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7336 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7337 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7338 ResultReg = NextResult;
7339 }
7340
7341 if (DstTy.isPointer()) {
7343 DstTy.getAddressSpace())) {
7344 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7345 return UnableToLegalize;
7346 }
7347
7348 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7349 }
7350
7351 MI.eraseFromParent();
7352 return Legalized;
7353}
7354
7357 const unsigned NumDst = MI.getNumOperands() - 1;
7358 Register SrcReg = MI.getOperand(NumDst).getReg();
7359 Register Dst0Reg = MI.getOperand(0).getReg();
7360 LLT DstTy = MRI.getType(Dst0Reg);
7361 if (DstTy.isPointer())
7362 return UnableToLegalize; // TODO
7363
7364 SrcReg = coerceToScalar(SrcReg);
7365 if (!SrcReg)
7366 return UnableToLegalize;
7367
7368 // Expand scalarizing unmerge as bitcast to integer and shift.
7369 LLT IntTy = MRI.getType(SrcReg);
7370
7371 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7372
7373 const unsigned DstSize = DstTy.getSizeInBits();
7374 unsigned Offset = DstSize;
7375 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7376 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7377 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7378 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7379 }
7380
7381 MI.eraseFromParent();
7382 return Legalized;
7383}
7384
7385/// Lower a vector extract or insert by writing the vector to a stack temporary
7386/// and reloading the element or vector.
7387///
7388/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7389/// =>
7390/// %stack_temp = G_FRAME_INDEX
7391/// G_STORE %vec, %stack_temp
7392/// %idx = clamp(%idx, %vec.getNumElements())
7393/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7394/// %dst = G_LOAD %element_ptr
7397 Register DstReg = MI.getOperand(0).getReg();
7398 Register SrcVec = MI.getOperand(1).getReg();
7399 Register InsertVal;
7400 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7401 InsertVal = MI.getOperand(2).getReg();
7402
7403 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7404
7405 LLT VecTy = MRI.getType(SrcVec);
7406 LLT EltTy = VecTy.getElementType();
7407 unsigned NumElts = VecTy.getNumElements();
7408
7409 int64_t IdxVal;
7410 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7412 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7413
7414 if (InsertVal) {
7415 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7416 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7417 } else {
7418 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7419 }
7420
7421 MI.eraseFromParent();
7422 return Legalized;
7423 }
7424
7425 if (!EltTy.isByteSized()) { // Not implemented.
7426 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7427 return UnableToLegalize;
7428 }
7429
7430 unsigned EltBytes = EltTy.getSizeInBytes();
7431 Align VecAlign = getStackTemporaryAlignment(VecTy);
7432 Align EltAlign;
7433
7434 MachinePointerInfo PtrInfo;
7435 auto StackTemp = createStackTemporary(
7436 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7437 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7438
7439 // Get the pointer to the element, and be sure not to hit undefined behavior
7440 // if the index is out of bounds.
7441 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7442
7443 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7444 int64_t Offset = IdxVal * EltBytes;
7445 PtrInfo = PtrInfo.getWithOffset(Offset);
7446 EltAlign = commonAlignment(VecAlign, Offset);
7447 } else {
7448 // We lose information with a variable offset.
7449 EltAlign = getStackTemporaryAlignment(EltTy);
7450 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7451 }
7452
7453 if (InsertVal) {
7454 // Write the inserted element
7455 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7456
7457 // Reload the whole vector.
7458 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7459 } else {
7460 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7461 }
7462
7463 MI.eraseFromParent();
7464 return Legalized;
7465}
7466
7469 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7470 MI.getFirst3RegLLTs();
7471 LLT IdxTy = LLT::scalar(32);
7472
7473 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7474 Register Undef;
7476 LLT EltTy = DstTy.getScalarType();
7477
7478 for (int Idx : Mask) {
7479 if (Idx < 0) {
7480 if (!Undef.isValid())
7481 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7482 BuildVec.push_back(Undef);
7483 continue;
7484 }
7485
7486 if (Src0Ty.isScalar()) {
7487 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7488 } else {
7489 int NumElts = Src0Ty.getNumElements();
7490 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7491 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7492 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7493 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7494 BuildVec.push_back(Extract.getReg(0));
7495 }
7496 }
7497
7498 if (DstTy.isScalar())
7499 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7500 else
7501 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7502 MI.eraseFromParent();
7503 return Legalized;
7504}
7505
7507 Register AllocSize,
7508 Align Alignment,
7509 LLT PtrTy) {
7510 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7511
7512 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7513 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7514
7515 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7516 // have to generate an extra instruction to negate the alloc and then use
7517 // G_PTR_ADD to add the negative offset.
7518 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7519 if (Alignment > Align(1)) {
7520 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7521 AlignMask.negate();
7522 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7523 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7524 }
7525
7526 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7527}
7528
7531 const auto &MF = *MI.getMF();
7532 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7533 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7534 return UnableToLegalize;
7535
7536 Register Dst = MI.getOperand(0).getReg();
7537 Register AllocSize = MI.getOperand(1).getReg();
7538 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7539
7540 LLT PtrTy = MRI.getType(Dst);
7542 Register SPTmp =
7543 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7544
7545 MIRBuilder.buildCopy(SPReg, SPTmp);
7546 MIRBuilder.buildCopy(Dst, SPTmp);
7547
7548 MI.eraseFromParent();
7549 return Legalized;
7550}
7551
7555 if (!StackPtr)
7556 return UnableToLegalize;
7557
7558 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7559 MI.eraseFromParent();
7560 return Legalized;
7561}
7562
7566 if (!StackPtr)
7567 return UnableToLegalize;
7568
7569 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7570 MI.eraseFromParent();
7571 return Legalized;
7572}
7573
7576 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7577 unsigned Offset = MI.getOperand(2).getImm();
7578
7579 // Extract sub-vector or one element
7580 if (SrcTy.isVector()) {
7581 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7582 unsigned DstSize = DstTy.getSizeInBits();
7583
7584 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7585 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7586 // Unmerge and allow access to each Src element for the artifact combiner.
7587 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7588
7589 // Take element(s) we need to extract and copy it (merge them).
7590 SmallVector<Register, 8> SubVectorElts;
7591 for (unsigned Idx = Offset / SrcEltSize;
7592 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7593 SubVectorElts.push_back(Unmerge.getReg(Idx));
7594 }
7595 if (SubVectorElts.size() == 1)
7596 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7597 else
7598 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7599
7600 MI.eraseFromParent();
7601 return Legalized;
7602 }
7603 }
7604
7605 if (DstTy.isScalar() &&
7606 (SrcTy.isScalar() ||
7607 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7608 LLT SrcIntTy = SrcTy;
7609 if (!SrcTy.isScalar()) {
7610 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7611 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7612 }
7613
7614 if (Offset == 0)
7615 MIRBuilder.buildTrunc(DstReg, SrcReg);
7616 else {
7617 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7618 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7619 MIRBuilder.buildTrunc(DstReg, Shr);
7620 }
7621
7622 MI.eraseFromParent();
7623 return Legalized;
7624 }
7625
7626 return UnableToLegalize;
7627}
7628
7630 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7631 uint64_t Offset = MI.getOperand(3).getImm();
7632
7633 LLT DstTy = MRI.getType(Src);
7634 LLT InsertTy = MRI.getType(InsertSrc);
7635
7636 // Insert sub-vector or one element
7637 if (DstTy.isVector() && !InsertTy.isPointer()) {
7638 LLT EltTy = DstTy.getElementType();
7639 unsigned EltSize = EltTy.getSizeInBits();
7640 unsigned InsertSize = InsertTy.getSizeInBits();
7641
7642 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7643 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7644 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7646 unsigned Idx = 0;
7647 // Elements from Src before insert start Offset
7648 for (; Idx < Offset / EltSize; ++Idx) {
7649 DstElts.push_back(UnmergeSrc.getReg(Idx));
7650 }
7651
7652 // Replace elements in Src with elements from InsertSrc
7653 if (InsertTy.getSizeInBits() > EltSize) {
7654 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7655 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7656 ++Idx, ++i) {
7657 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7658 }
7659 } else {
7660 DstElts.push_back(InsertSrc);
7661 ++Idx;
7662 }
7663
7664 // Remaining elements from Src after insert
7665 for (; Idx < DstTy.getNumElements(); ++Idx) {
7666 DstElts.push_back(UnmergeSrc.getReg(Idx));
7667 }
7668
7669 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7670 MI.eraseFromParent();
7671 return Legalized;
7672 }
7673 }
7674
7675 if (InsertTy.isVector() ||
7676 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7677 return UnableToLegalize;
7678
7680 if ((DstTy.isPointer() &&
7681 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7682 (InsertTy.isPointer() &&
7683 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7684 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7685 return UnableToLegalize;
7686 }
7687
7688 LLT IntDstTy = DstTy;
7689
7690 if (!DstTy.isScalar()) {
7691 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7692 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7693 }
7694
7695 if (!InsertTy.isScalar()) {
7696 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7697 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7698 }
7699
7700 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7701 if (Offset != 0) {
7702 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7703 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7704 }
7705
7707 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7708
7709 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7710 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7711 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7712
7713 MIRBuilder.buildCast(Dst, Or);
7714 MI.eraseFromParent();
7715 return Legalized;
7716}
7717
7720 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7721 MI.getFirst4RegLLTs();
7722 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7723
7724 LLT Ty = Dst0Ty;
7725 LLT BoolTy = Dst1Ty;
7726
7727 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7728
7729 if (IsAdd)
7730 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
7731 else
7732 MIRBuilder.buildSub(NewDst0, LHS, RHS);
7733
7734 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7735
7736 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7737
7738 // For an addition, the result should be less than one of the operands (LHS)
7739 // if and only if the other operand (RHS) is negative, otherwise there will
7740 // be overflow.
7741 // For a subtraction, the result should be less than one of the operands
7742 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7743 // otherwise there will be overflow.
7744 auto ResultLowerThanLHS =
7745 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
7746 auto ConditionRHS = MIRBuilder.buildICmp(
7747 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7748
7749 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7750
7751 MIRBuilder.buildCopy(Dst0, NewDst0);
7752 MI.eraseFromParent();
7753
7754 return Legalized;
7755}
7756
7759 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7760 LLT Ty = MRI.getType(Res);
7761 bool IsSigned;
7762 bool IsAdd;
7763 unsigned BaseOp;
7764 switch (MI.getOpcode()) {
7765 default:
7766 llvm_unreachable("unexpected addsat/subsat opcode");
7767 case TargetOpcode::G_UADDSAT:
7768 IsSigned = false;
7769 IsAdd = true;
7770 BaseOp = TargetOpcode::G_ADD;
7771 break;
7772 case TargetOpcode::G_SADDSAT:
7773 IsSigned = true;
7774 IsAdd = true;
7775 BaseOp = TargetOpcode::G_ADD;
7776 break;
7777 case TargetOpcode::G_USUBSAT:
7778 IsSigned = false;
7779 IsAdd = false;
7780 BaseOp = TargetOpcode::G_SUB;
7781 break;
7782 case TargetOpcode::G_SSUBSAT:
7783 IsSigned = true;
7784 IsAdd = false;
7785 BaseOp = TargetOpcode::G_SUB;
7786 break;
7787 }
7788
7789 if (IsSigned) {
7790 // sadd.sat(a, b) ->
7791 // hi = 0x7fffffff - smax(a, 0)
7792 // lo = 0x80000000 - smin(a, 0)
7793 // a + smin(smax(lo, b), hi)
7794 // ssub.sat(a, b) ->
7795 // lo = smax(a, -1) - 0x7fffffff
7796 // hi = smin(a, -1) - 0x80000000
7797 // a - smin(smax(lo, b), hi)
7798 // TODO: AMDGPU can use a "median of 3" instruction here:
7799 // a +/- med3(lo, b, hi)
7800 uint64_t NumBits = Ty.getScalarSizeInBits();
7801 auto MaxVal =
7803 auto MinVal =
7806 if (IsAdd) {
7807 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7808 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7809 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7810 } else {
7811 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7812 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7813 MaxVal);
7814 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7815 MinVal);
7816 }
7817 auto RHSClamped =
7819 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7820 } else {
7821 // uadd.sat(a, b) -> a + umin(~a, b)
7822 // usub.sat(a, b) -> a - umin(a, b)
7823 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7824 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7825 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7826 }
7827
7828 MI.eraseFromParent();
7829 return Legalized;
7830}
7831
7834 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7835 LLT Ty = MRI.getType(Res);
7836 LLT BoolTy = Ty.changeElementSize(1);
7837 bool IsSigned;
7838 bool IsAdd;
7839 unsigned OverflowOp;
7840 switch (MI.getOpcode()) {
7841 default:
7842 llvm_unreachable("unexpected addsat/subsat opcode");
7843 case TargetOpcode::G_UADDSAT:
7844 IsSigned = false;
7845 IsAdd = true;
7846 OverflowOp = TargetOpcode::G_UADDO;
7847 break;
7848 case TargetOpcode::G_SADDSAT:
7849 IsSigned = true;
7850 IsAdd = true;
7851 OverflowOp = TargetOpcode::G_SADDO;
7852 break;
7853 case TargetOpcode::G_USUBSAT:
7854 IsSigned = false;
7855 IsAdd = false;
7856 OverflowOp = TargetOpcode::G_USUBO;
7857 break;
7858 case TargetOpcode::G_SSUBSAT:
7859 IsSigned = true;
7860 IsAdd = false;
7861 OverflowOp = TargetOpcode::G_SSUBO;
7862 break;
7863 }
7864
7865 auto OverflowRes =
7866 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7867 Register Tmp = OverflowRes.getReg(0);
7868 Register Ov = OverflowRes.getReg(1);
7869 MachineInstrBuilder Clamp;
7870 if (IsSigned) {
7871 // sadd.sat(a, b) ->
7872 // {tmp, ov} = saddo(a, b)
7873 // ov ? (tmp >>s 31) + 0x80000000 : r
7874 // ssub.sat(a, b) ->
7875 // {tmp, ov} = ssubo(a, b)
7876 // ov ? (tmp >>s 31) + 0x80000000 : r
7877 uint64_t NumBits = Ty.getScalarSizeInBits();
7878 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7879 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7880 auto MinVal =
7882 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7883 } else {
7884 // uadd.sat(a, b) ->
7885 // {tmp, ov} = uaddo(a, b)
7886 // ov ? 0xffffffff : tmp
7887 // usub.sat(a, b) ->
7888 // {tmp, ov} = usubo(a, b)
7889 // ov ? 0 : tmp
7890 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7891 }
7892 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7893
7894 MI.eraseFromParent();
7895 return Legalized;
7896}
7897
7900 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7901 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7902 "Expected shlsat opcode!");
7903 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7904 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7905 LLT Ty = MRI.getType(Res);
7906 LLT BoolTy = Ty.changeElementSize(1);
7907
7908 unsigned BW = Ty.getScalarSizeInBits();
7909 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7910 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7911 : MIRBuilder.buildLShr(Ty, Result, RHS);
7912
7913 MachineInstrBuilder SatVal;
7914 if (IsSigned) {
7915 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7916 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7917 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7918 MIRBuilder.buildConstant(Ty, 0));
7919 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7920 } else {
7922 }
7923 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7924 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7925
7926 MI.eraseFromParent();
7927 return Legalized;
7928}
7929
7931 auto [Dst, Src] = MI.getFirst2Regs();
7932 const LLT Ty = MRI.getType(Src);
7933 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7934 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7935
7936 // Swap most and least significant byte, set remaining bytes in Res to zero.
7937 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7938 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7939 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7940 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7941
7942 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7943 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7944 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7945 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7946 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7947 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7948 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7949 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7950 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7951 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7952 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7953 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7954 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7955 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7956 }
7957 Res.getInstr()->getOperand(0).setReg(Dst);
7958
7959 MI.eraseFromParent();
7960 return Legalized;
7961}
7962
7963//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7965 MachineInstrBuilder Src, const APInt &Mask) {
7966 const LLT Ty = Dst.getLLTTy(*B.getMRI());
7967 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7968 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7969 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7970 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7971 return B.buildOr(Dst, LHS, RHS);
7972}
7973
7976 auto [Dst, Src] = MI.getFirst2Regs();
7977 const LLT Ty = MRI.getType(Src);
7978 unsigned Size = Ty.getSizeInBits();
7979
7980 MachineInstrBuilder BSWAP =
7981 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7982
7983 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7984 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7985 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7986 MachineInstrBuilder Swap4 =
7987 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7988
7989 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7990 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7991 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7992 MachineInstrBuilder Swap2 =
7993 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7994
7995 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7996 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7997 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7998 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7999
8000 MI.eraseFromParent();
8001 return Legalized;
8002}
8003
8007
8008 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8009 int NameOpIdx = IsRead ? 1 : 0;
8010 int ValRegIndex = IsRead ? 0 : 1;
8011
8012 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8013 const LLT Ty = MRI.getType(ValReg);
8014 const MDString *RegStr = cast<MDString>(
8015 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8016
8017 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8018 if (!PhysReg.isValid())
8019 return UnableToLegalize;
8020
8021 if (IsRead)
8022 MIRBuilder.buildCopy(ValReg, PhysReg);
8023 else
8024 MIRBuilder.buildCopy(PhysReg, ValReg);
8025
8026 MI.eraseFromParent();
8027 return Legalized;
8028}
8029
8032 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8033 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8034 Register Result = MI.getOperand(0).getReg();
8035 LLT OrigTy = MRI.getType(Result);
8036 auto SizeInBits = OrigTy.getScalarSizeInBits();
8037 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8038
8039 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8040 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8041 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8042 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8043
8044 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8045 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8046 MIRBuilder.buildTrunc(Result, Shifted);
8047
8048 MI.eraseFromParent();
8049 return Legalized;
8050}
8051
8054 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8055 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8056
8057 if (Mask == fcNone) {
8058 MIRBuilder.buildConstant(DstReg, 0);
8059 MI.eraseFromParent();
8060 return Legalized;
8061 }
8062 if (Mask == fcAllFlags) {
8063 MIRBuilder.buildConstant(DstReg, 1);
8064 MI.eraseFromParent();
8065 return Legalized;
8066 }
8067
8068 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8069 // version
8070
8071 unsigned BitSize = SrcTy.getScalarSizeInBits();
8072 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8073
8074 LLT IntTy = LLT::scalar(BitSize);
8075 if (SrcTy.isVector())
8076 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8077 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8078
8079 // Various masks.
8080 APInt SignBit = APInt::getSignMask(BitSize);
8081 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8082 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8083 APInt ExpMask = Inf;
8084 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8085 APInt QNaNBitMask =
8086 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8087 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8088
8089 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8090 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8091 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8092 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8093 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8094
8095 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8096 auto Sign =
8098
8099 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8100 // Clang doesn't support capture of structured bindings:
8101 LLT DstTyCopy = DstTy;
8102 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8103 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8104 };
8105
8106 // Tests that involve more than one class should be processed first.
8107 if ((Mask & fcFinite) == fcFinite) {
8108 // finite(V) ==> abs(V) u< exp_mask
8109 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8110 ExpMaskC));
8111 Mask &= ~fcFinite;
8112 } else if ((Mask & fcFinite) == fcPosFinite) {
8113 // finite(V) && V > 0 ==> V u< exp_mask
8114 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8115 ExpMaskC));
8116 Mask &= ~fcPosFinite;
8117 } else if ((Mask & fcFinite) == fcNegFinite) {
8118 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8119 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8120 ExpMaskC);
8121 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8122 appendToRes(And);
8123 Mask &= ~fcNegFinite;
8124 }
8125
8126 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8127 // fcZero | fcSubnormal => test all exponent bits are 0
8128 // TODO: Handle sign bit specific cases
8129 // TODO: Handle inverted case
8130 if (PartialCheck == (fcZero | fcSubnormal)) {
8131 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8133 ExpBits, ZeroC));
8134 Mask &= ~PartialCheck;
8135 }
8136 }
8137
8138 // Check for individual classes.
8139 if (FPClassTest PartialCheck = Mask & fcZero) {
8140 if (PartialCheck == fcPosZero)
8142 AsInt, ZeroC));
8143 else if (PartialCheck == fcZero)
8144 appendToRes(
8146 else // fcNegZero
8148 AsInt, SignBitC));
8149 }
8150
8151 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8152 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8153 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8154 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8155 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8156 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8157 auto SubnormalRes =
8159 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8160 if (PartialCheck == fcNegSubnormal)
8161 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8162 appendToRes(SubnormalRes);
8163 }
8164
8165 if (FPClassTest PartialCheck = Mask & fcInf) {
8166 if (PartialCheck == fcPosInf)
8168 AsInt, InfC));
8169 else if (PartialCheck == fcInf)
8170 appendToRes(
8172 else { // fcNegInf
8173 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8174 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8176 AsInt, NegInfC));
8177 }
8178 }
8179
8180 if (FPClassTest PartialCheck = Mask & fcNan) {
8181 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8182 if (PartialCheck == fcNan) {
8183 // isnan(V) ==> abs(V) u> int(inf)
8184 appendToRes(
8186 } else if (PartialCheck == fcQNan) {
8187 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8188 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8189 InfWithQnanBitC));
8190 } else { // fcSNan
8191 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8192 // abs(V) u< (unsigned(Inf) | quiet_bit)
8193 auto IsNan =
8195 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8196 Abs, InfWithQnanBitC);
8197 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8198 }
8199 }
8200
8201 if (FPClassTest PartialCheck = Mask & fcNormal) {
8202 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8203 // (max_exp-1))
8204 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8205 auto ExpMinusOne = MIRBuilder.buildSub(
8206 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8207 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8208 auto NormalRes =
8210 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8211 if (PartialCheck == fcNegNormal)
8212 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8213 else if (PartialCheck == fcPosNormal) {
8214 auto PosSign = MIRBuilder.buildXor(
8215 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8216 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8217 }
8218 appendToRes(NormalRes);
8219 }
8220
8221 MIRBuilder.buildCopy(DstReg, Res);
8222 MI.eraseFromParent();
8223 return Legalized;
8224}
8225
8227 // Implement G_SELECT in terms of XOR, AND, OR.
8228 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8229 MI.getFirst4RegLLTs();
8230
8231 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8232 if (IsEltPtr) {
8233 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8234 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8235 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8236 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8237 DstTy = NewTy;
8238 }
8239
8240 if (MaskTy.isScalar()) {
8241 // Turn the scalar condition into a vector condition mask if needed.
8242
8243 Register MaskElt = MaskReg;
8244
8245 // The condition was potentially zero extended before, but we want a sign
8246 // extended boolean.
8247 if (MaskTy != LLT::scalar(1))
8248 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8249
8250 // Continue the sign extension (or truncate) to match the data type.
8251 MaskElt =
8252 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8253
8254 if (DstTy.isVector()) {
8255 // Generate a vector splat idiom.
8256 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8257 MaskReg = ShufSplat.getReg(0);
8258 } else {
8259 MaskReg = MaskElt;
8260 }
8261 MaskTy = DstTy;
8262 } else if (!DstTy.isVector()) {
8263 // Cannot handle the case that mask is a vector and dst is a scalar.
8264 return UnableToLegalize;
8265 }
8266
8267 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8268 return UnableToLegalize;
8269 }
8270
8271 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8272 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8273 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8274 if (IsEltPtr) {
8275 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8276 MIRBuilder.buildIntToPtr(DstReg, Or);
8277 } else {
8278 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8279 }
8280 MI.eraseFromParent();
8281 return Legalized;
8282}
8283
8285 // Split DIVREM into individual instructions.
8286 unsigned Opcode = MI.getOpcode();
8287
8289 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8290 : TargetOpcode::G_UDIV,
8291 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8293 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8294 : TargetOpcode::G_UREM,
8295 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8296 MI.eraseFromParent();
8297 return Legalized;
8298}
8299
8302 // Expand %res = G_ABS %a into:
8303 // %v1 = G_ASHR %a, scalar_size-1
8304 // %v2 = G_ADD %a, %v1
8305 // %res = G_XOR %v2, %v1
8306 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8307 Register OpReg = MI.getOperand(1).getReg();
8308 auto ShiftAmt =
8309 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8310 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8311 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8312 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8313 MI.eraseFromParent();
8314 return Legalized;
8315}
8316
8319 // Expand %res = G_ABS %a into:
8320 // %v1 = G_CONSTANT 0
8321 // %v2 = G_SUB %v1, %a
8322 // %res = G_SMAX %a, %v2
8323 Register SrcReg = MI.getOperand(1).getReg();
8324 LLT Ty = MRI.getType(SrcReg);
8325 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8326 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8327 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8328 MI.eraseFromParent();
8329 return Legalized;
8330}
8331
8334 Register SrcReg = MI.getOperand(1).getReg();
8335 Register DestReg = MI.getOperand(0).getReg();
8336 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8337 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8338 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8339 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8340 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8341 MI.eraseFromParent();
8342 return Legalized;
8343}
8344
8347 Register SrcReg = MI.getOperand(1).getReg();
8348 LLT SrcTy = MRI.getType(SrcReg);
8349 LLT DstTy = MRI.getType(SrcReg);
8350
8351 // The source could be a scalar if the IR type was <1 x sN>.
8352 if (SrcTy.isScalar()) {
8353 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8354 return UnableToLegalize; // FIXME: handle extension.
8355 // This can be just a plain copy.
8357 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8359 return Legalized;
8360 }
8361 return UnableToLegalize;
8362}
8363
8364static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
8365
8367 MachineFunction &MF = *MI.getMF();
8369 LLVMContext &Ctx = MF.getFunction().getContext();
8370 Register ListPtr = MI.getOperand(1).getReg();
8371 LLT PtrTy = MRI.getType(ListPtr);
8372
8373 // LstPtr is a pointer to the head of the list. Get the address
8374 // of the head of the list.
8375 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8376 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8377 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8378 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8379
8380 const Align A(MI.getOperand(2).getImm());
8381 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8382 if (A > TLI.getMinStackArgumentAlignment()) {
8383 Register AlignAmt =
8384 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8385 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8386 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8387 VAList = AndDst.getReg(0);
8388 }
8389
8390 // Increment the pointer, VAList, to the next vaarg
8391 // The list should be bumped by the size of element in the current head of
8392 // list.
8393 Register Dst = MI.getOperand(0).getReg();
8394 LLT LLTTy = MRI.getType(Dst);
8395 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8396 auto IncAmt =
8397 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8398 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8399
8400 // Store the increment VAList to the legalized pointer
8402 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8403 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8404 // Load the actual argument out of the pointer VAList
8405 Align EltAlignment = DL.getABITypeAlign(Ty);
8406 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8407 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8408 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8409
8410 MI.eraseFromParent();
8411 return Legalized;
8412}
8413
8415 // On Darwin, -Os means optimize for size without hurting performance, so
8416 // only really optimize for size when -Oz (MinSize) is used.
8418 return MF.getFunction().hasMinSize();
8419 return MF.getFunction().hasOptSize();
8420}
8421
8422// Returns a list of types to use for memory op lowering in MemOps. A partial
8423// port of findOptimalMemOpLowering in TargetLowering.
8424static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8425 unsigned Limit, const MemOp &Op,
8426 unsigned DstAS, unsigned SrcAS,
8427 const AttributeList &FuncAttributes,
8428 const TargetLowering &TLI) {
8429 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8430 return false;
8431
8432 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8433
8434 if (Ty == LLT()) {
8435 // Use the largest scalar type whose alignment constraints are satisfied.
8436 // We only need to check DstAlign here as SrcAlign is always greater or
8437 // equal to DstAlign (or zero).
8438 Ty = LLT::scalar(64);
8439 if (Op.isFixedDstAlign())
8440 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8441 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8442 Ty = LLT::scalar(Ty.getSizeInBytes());
8443 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8444 // FIXME: check for the largest legal type we can load/store to.
8445 }
8446
8447 unsigned NumMemOps = 0;
8448 uint64_t Size = Op.size();
8449 while (Size) {
8450 unsigned TySize = Ty.getSizeInBytes();
8451 while (TySize > Size) {
8452 // For now, only use non-vector load / store's for the left-over pieces.
8453 LLT NewTy = Ty;
8454 // FIXME: check for mem op safety and legality of the types. Not all of
8455 // SDAGisms map cleanly to GISel concepts.
8456 if (NewTy.isVector())
8457 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8458 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8459 unsigned NewTySize = NewTy.getSizeInBytes();
8460 assert(NewTySize > 0 && "Could not find appropriate type");
8461
8462 // If the new LLT cannot cover all of the remaining bits, then consider
8463 // issuing a (or a pair of) unaligned and overlapping load / store.
8464 unsigned Fast;
8465 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8466 MVT VT = getMVTForLLT(Ty);
8467 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8469 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8471 Fast)
8472 TySize = Size;
8473 else {
8474 Ty = NewTy;
8475 TySize = NewTySize;
8476 }
8477 }
8478
8479 if (++NumMemOps > Limit)
8480 return false;
8481
8482 MemOps.push_back(Ty);
8483 Size -= TySize;
8484 }
8485
8486 return true;
8487}
8488
8490 if (Ty.isVector())
8492 Ty.getNumElements());
8493 return IntegerType::get(C, Ty.getSizeInBits());
8494}
8495
8496// Get a vectorized representation of the memset value operand, GISel edition.
8498 MachineRegisterInfo &MRI = *MIB.getMRI();
8499 unsigned NumBits = Ty.getScalarSizeInBits();
8500 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8501 if (!Ty.isVector() && ValVRegAndVal) {
8502 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8503 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8504 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8505 }
8506
8507 // Extend the byte value to the larger type, and then multiply by a magic
8508 // value 0x010101... in order to replicate it across every byte.
8509 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8510 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8511 return MIB.buildConstant(Ty, 0).getReg(0);
8512 }
8513
8514 LLT ExtType = Ty.getScalarType();
8515 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8516 if (NumBits > 8) {
8517 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8518 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8519 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8520 }
8521
8522 // For vector types create a G_BUILD_VECTOR.
8523 if (Ty.isVector())
8524 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8525
8526 return Val;
8527}
8528
8530LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8531 uint64_t KnownLen, Align Alignment,
8532 bool IsVolatile) {
8533 auto &MF = *MI.getParent()->getParent();
8534 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8535 auto &DL = MF.getDataLayout();
8536 LLVMContext &C = MF.getFunction().getContext();
8537
8538 assert(KnownLen != 0 && "Have a zero length memset length!");
8539
8540 bool DstAlignCanChange = false;
8541 MachineFrameInfo &MFI = MF.getFrameInfo();
8542 bool OptSize = shouldLowerMemFuncForSize(MF);
8543
8544 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8545 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8546 DstAlignCanChange = true;
8547
8548 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8549 std::vector<LLT> MemOps;
8550
8551 const auto &DstMMO = **MI.memoperands_begin();
8552 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8553
8554 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8555 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8556
8557 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8558 MemOp::Set(KnownLen, DstAlignCanChange,
8559 Alignment,
8560 /*IsZeroMemset=*/IsZeroVal,
8561 /*IsVolatile=*/IsVolatile),
8562 DstPtrInfo.getAddrSpace(), ~0u,
8563 MF.getFunction().getAttributes(), TLI))
8564 return UnableToLegalize;
8565
8566 if (DstAlignCanChange) {
8567 // Get an estimate of the type from the LLT.
8568 Type *IRTy = getTypeForLLT(MemOps[0], C);
8569 Align NewAlign = DL.getABITypeAlign(IRTy);
8570 if (NewAlign > Alignment) {
8571 Alignment = NewAlign;
8572 unsigned FI = FIDef->getOperand(1).getIndex();
8573 // Give the stack frame object a larger alignment if needed.
8574 if (MFI.getObjectAlign(FI) < Alignment)
8575 MFI.setObjectAlignment(FI, Alignment);
8576 }
8577 }
8578
8579 MachineIRBuilder MIB(MI);
8580 // Find the largest store and generate the bit pattern for it.
8581 LLT LargestTy = MemOps[0];
8582 for (unsigned i = 1; i < MemOps.size(); i++)
8583 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8584 LargestTy = MemOps[i];
8585
8586 // The memset stored value is always defined as an s8, so in order to make it
8587 // work with larger store types we need to repeat the bit pattern across the
8588 // wider type.
8589 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8590
8591 if (!MemSetValue)
8592 return UnableToLegalize;
8593
8594 // Generate the stores. For each store type in the list, we generate the
8595 // matching store of that type to the destination address.
8596 LLT PtrTy = MRI.getType(Dst);
8597 unsigned DstOff = 0;
8598 unsigned Size = KnownLen;
8599 for (unsigned I = 0; I < MemOps.size(); I++) {
8600 LLT Ty = MemOps[I];
8601 unsigned TySize = Ty.getSizeInBytes();
8602 if (TySize > Size) {
8603 // Issuing an unaligned load / store pair that overlaps with the previous
8604 // pair. Adjust the offset accordingly.
8605 assert(I == MemOps.size() - 1 && I != 0);
8606 DstOff -= TySize - Size;
8607 }
8608
8609 // If this store is smaller than the largest store see whether we can get
8610 // the smaller value for free with a truncate.
8611 Register Value = MemSetValue;
8612 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8613 MVT VT = getMVTForLLT(Ty);
8614 MVT LargestVT = getMVTForLLT(LargestTy);
8615 if (!LargestTy.isVector() && !Ty.isVector() &&
8616 TLI.isTruncateFree(LargestVT, VT))
8617 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8618 else
8619 Value = getMemsetValue(Val, Ty, MIB);
8620 if (!Value)
8621 return UnableToLegalize;
8622 }
8623
8624 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8625
8626 Register Ptr = Dst;
8627 if (DstOff != 0) {
8628 auto Offset =
8629 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8630 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8631 }
8632
8633 MIB.buildStore(Value, Ptr, *StoreMMO);
8634 DstOff += Ty.getSizeInBytes();
8635 Size -= TySize;
8636 }
8637
8638 MI.eraseFromParent();
8639 return Legalized;
8640}
8641
8643LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8644 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8645
8646 auto [Dst, Src, Len] = MI.getFirst3Regs();
8647
8648 const auto *MMOIt = MI.memoperands_begin();
8649 const MachineMemOperand *MemOp = *MMOIt;
8650 bool IsVolatile = MemOp->isVolatile();
8651
8652 // See if this is a constant length copy
8653 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8654 // FIXME: support dynamically sized G_MEMCPY_INLINE
8655 assert(LenVRegAndVal &&
8656 "inline memcpy with dynamic size is not yet supported");
8657 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8658 if (KnownLen == 0) {
8659 MI.eraseFromParent();
8660 return Legalized;
8661 }
8662
8663 const auto &DstMMO = **MI.memoperands_begin();
8664 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8665 Align DstAlign = DstMMO.getBaseAlign();
8666 Align SrcAlign = SrcMMO.getBaseAlign();
8667
8668 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8669 IsVolatile);
8670}
8671
8673LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8674 uint64_t KnownLen, Align DstAlign,
8675 Align SrcAlign, bool IsVolatile) {
8676 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8677 return lowerMemcpy(MI, Dst, Src, KnownLen,
8678 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8679 IsVolatile);
8680}
8681
8683LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8684 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8685 Align SrcAlign, bool IsVolatile) {
8686 auto &MF = *MI.getParent()->getParent();
8687 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8688 auto &DL = MF.getDataLayout();
8689 LLVMContext &C = MF.getFunction().getContext();
8690
8691 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8692
8693 bool DstAlignCanChange = false;
8694 MachineFrameInfo &MFI = MF.getFrameInfo();
8695 Align Alignment = std::min(DstAlign, SrcAlign);
8696
8697 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8698 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8699 DstAlignCanChange = true;
8700
8701 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8702 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8703 // if the memcpy is in a tail call position.
8704
8705 std::vector<LLT> MemOps;
8706
8707 const auto &DstMMO = **MI.memoperands_begin();
8708 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8709 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8710 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8711
8713 MemOps, Limit,
8714 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8715 IsVolatile),
8716 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8717 MF.getFunction().getAttributes(), TLI))
8718 return UnableToLegalize;
8719
8720 if (DstAlignCanChange) {
8721 // Get an estimate of the type from the LLT.
8722 Type *IRTy = getTypeForLLT(MemOps[0], C);
8723 Align NewAlign = DL.getABITypeAlign(IRTy);
8724
8725 // Don't promote to an alignment that would require dynamic stack
8726 // realignment.
8727 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8728 if (!TRI->hasStackRealignment(MF))
8729 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8730 NewAlign = NewAlign.previous();
8731
8732 if (NewAlign > Alignment) {
8733 Alignment = NewAlign;
8734 unsigned FI = FIDef->getOperand(1).getIndex();
8735 // Give the stack frame object a larger alignment if needed.
8736 if (MFI.getObjectAlign(FI) < Alignment)
8737 MFI.setObjectAlignment(FI, Alignment);
8738 }
8739 }
8740
8741 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8742
8743 MachineIRBuilder MIB(MI);
8744 // Now we need to emit a pair of load and stores for each of the types we've
8745 // collected. I.e. for each type, generate a load from the source pointer of
8746 // that type width, and then generate a corresponding store to the dest buffer
8747 // of that value loaded. This can result in a sequence of loads and stores
8748 // mixed types, depending on what the target specifies as good types to use.
8749 unsigned CurrOffset = 0;
8750 unsigned Size = KnownLen;
8751 for (auto CopyTy : MemOps) {
8752 // Issuing an unaligned load / store pair that overlaps with the previous
8753 // pair. Adjust the offset accordingly.
8754 if (CopyTy.getSizeInBytes() > Size)
8755 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8756
8757 // Construct MMOs for the accesses.
8758 auto *LoadMMO =
8759 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8760 auto *StoreMMO =
8761 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8762
8763 // Create the load.
8764 Register LoadPtr = Src;
8766 if (CurrOffset != 0) {
8767 LLT SrcTy = MRI.getType(Src);
8768 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8769 .getReg(0);
8770 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8771 }
8772 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8773
8774 // Create the store.
8775 Register StorePtr = Dst;
8776 if (CurrOffset != 0) {
8777 LLT DstTy = MRI.getType(Dst);
8778 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8779 }
8780 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8781 CurrOffset += CopyTy.getSizeInBytes();
8782 Size -= CopyTy.getSizeInBytes();
8783 }
8784
8785 MI.eraseFromParent();
8786 return Legalized;
8787}
8788
8790LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8791 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8792 bool IsVolatile) {
8793 auto &MF = *MI.getParent()->getParent();
8794 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8795 auto &DL = MF.getDataLayout();
8796 LLVMContext &C = MF.getFunction().getContext();
8797
8798 assert(KnownLen != 0 && "Have a zero length memmove length!");
8799
8800 bool DstAlignCanChange = false;
8801 MachineFrameInfo &MFI = MF.getFrameInfo();
8802 bool OptSize = shouldLowerMemFuncForSize(MF);
8803 Align Alignment = std::min(DstAlign, SrcAlign);
8804
8805 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8806 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8807 DstAlignCanChange = true;
8808
8809 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8810 std::vector<LLT> MemOps;
8811
8812 const auto &DstMMO = **MI.memoperands_begin();
8813 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8814 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8815 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8816
8817 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8818 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8819 // same thing here.
8821 MemOps, Limit,
8822 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8823 /*IsVolatile*/ true),
8824 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8825 MF.getFunction().getAttributes(), TLI))
8826 return UnableToLegalize;
8827
8828 if (DstAlignCanChange) {
8829 // Get an estimate of the type from the LLT.
8830 Type *IRTy = getTypeForLLT(MemOps[0], C);
8831 Align NewAlign = DL.getABITypeAlign(IRTy);
8832
8833 // Don't promote to an alignment that would require dynamic stack
8834 // realignment.
8835 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8836 if (!TRI->hasStackRealignment(MF))
8837 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8838 NewAlign = NewAlign.previous();
8839
8840 if (NewAlign > Alignment) {
8841 Alignment = NewAlign;
8842 unsigned FI = FIDef->getOperand(1).getIndex();
8843 // Give the stack frame object a larger alignment if needed.
8844 if (MFI.getObjectAlign(FI) < Alignment)
8845 MFI.setObjectAlignment(FI, Alignment);
8846 }
8847 }
8848
8849 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8850
8851 MachineIRBuilder MIB(MI);
8852 // Memmove requires that we perform the loads first before issuing the stores.
8853 // Apart from that, this loop is pretty much doing the same thing as the
8854 // memcpy codegen function.
8855 unsigned CurrOffset = 0;
8857 for (auto CopyTy : MemOps) {
8858 // Construct MMO for the load.
8859 auto *LoadMMO =
8860 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8861
8862 // Create the load.
8863 Register LoadPtr = Src;
8864 if (CurrOffset != 0) {
8865 LLT SrcTy = MRI.getType(Src);
8866 auto Offset =
8867 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
8868 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8869 }
8870 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8871 CurrOffset += CopyTy.getSizeInBytes();
8872 }
8873
8874 CurrOffset = 0;
8875 for (unsigned I = 0; I < MemOps.size(); ++I) {
8876 LLT CopyTy = MemOps[I];
8877 // Now store the values loaded.
8878 auto *StoreMMO =
8879 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8880
8881 Register StorePtr = Dst;
8882 if (CurrOffset != 0) {
8883 LLT DstTy = MRI.getType(Dst);
8884 auto Offset =
8885 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
8886 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8887 }
8888 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8889 CurrOffset += CopyTy.getSizeInBytes();
8890 }
8891 MI.eraseFromParent();
8892 return Legalized;
8893}
8894
8897 const unsigned Opc = MI.getOpcode();
8898 // This combine is fairly complex so it's not written with a separate
8899 // matcher function.
8900 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8901 Opc == TargetOpcode::G_MEMSET) &&
8902 "Expected memcpy like instruction");
8903
8904 auto MMOIt = MI.memoperands_begin();
8905 const MachineMemOperand *MemOp = *MMOIt;
8906
8907 Align DstAlign = MemOp->getBaseAlign();
8908 Align SrcAlign;
8909 auto [Dst, Src, Len] = MI.getFirst3Regs();
8910
8911 if (Opc != TargetOpcode::G_MEMSET) {
8912 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8913 MemOp = *(++MMOIt);
8914 SrcAlign = MemOp->getBaseAlign();
8915 }
8916
8917 // See if this is a constant length copy
8918 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8919 if (!LenVRegAndVal)
8920 return UnableToLegalize;
8921 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8922
8923 if (KnownLen == 0) {
8924 MI.eraseFromParent();
8925 return Legalized;
8926 }
8927
8928 bool IsVolatile = MemOp->isVolatile();
8929 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8930 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8931 IsVolatile);
8932
8933 // Don't try to optimize volatile.
8934 if (IsVolatile)
8935 return UnableToLegalize;
8936
8937 if (MaxLen && KnownLen > MaxLen)
8938 return UnableToLegalize;
8939
8940 if (Opc == TargetOpcode::G_MEMCPY) {
8941 auto &MF = *MI.getParent()->getParent();
8942 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8943 bool OptSize = shouldLowerMemFuncForSize(MF);
8944 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8945 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8946 IsVolatile);
8947 }
8948 if (Opc == TargetOpcode::G_MEMMOVE)
8949 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8950 if (Opc == TargetOpcode::G_MEMSET)
8951 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8952 return UnableToLegalize;
8953}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
void negate()
Negate this APInt in place.
Definition: APInt.h:1428
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1075
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:803
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
bool isSigned() const
Definition: InstrTypes.h:1265
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:314
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:685
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFreeze(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_FREEZE Src.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:929
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:789
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:228
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1554
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1158
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:493
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:349
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:219
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:334
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1246
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:597
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)