LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy = LLT::scalarOrVector(
73 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
74 } else {
75 LeftoverTy = LLT::scalar(LeftoverSize);
76 }
77
78 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
79 return std::make_pair(NumParts, NumLeftover);
80}
81
83
84 if (!Ty.isScalar())
85 return nullptr;
86
87 switch (Ty.getSizeInBits()) {
88 case 16:
89 return Type::getHalfTy(Ctx);
90 case 32:
91 return Type::getFloatTy(Ctx);
92 case 64:
93 return Type::getDoubleTy(Ctx);
94 case 80:
95 return Type::getX86_FP80Ty(Ctx);
96 case 128:
97 return Type::getFP128Ty(Ctx);
98 default:
99 return nullptr;
100 }
101}
102
104 GISelChangeObserver &Observer,
105 MachineIRBuilder &Builder)
106 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107 LI(*MF.getSubtarget().getLegalizerInfo()),
108 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
109
111 GISelChangeObserver &Observer,
113 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
114 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115
118 LostDebugLocObserver &LocObserver) {
119 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
120
122
123 if (isa<GIntrinsic>(MI))
124 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
125 auto Step = LI.getAction(MI, MRI);
126 switch (Step.Action) {
127 case Legal:
128 LLVM_DEBUG(dbgs() << ".. Already legal\n");
129 return AlreadyLegal;
130 case Libcall:
131 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132 return libcall(MI, LocObserver);
133 case NarrowScalar:
134 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
135 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
136 case WidenScalar:
137 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
138 return widenScalar(MI, Step.TypeIdx, Step.NewType);
139 case Bitcast:
140 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
141 return bitcast(MI, Step.TypeIdx, Step.NewType);
142 case Lower:
143 LLVM_DEBUG(dbgs() << ".. Lower\n");
144 return lower(MI, Step.TypeIdx, Step.NewType);
145 case FewerElements:
146 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
147 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
148 case MoreElements:
149 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
150 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
151 case Custom:
152 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
153 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
155 default:
156 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
157 return UnableToLegalize;
158 }
159}
160
161void LegalizerHelper::insertParts(Register DstReg,
162 LLT ResultTy, LLT PartTy,
163 ArrayRef<Register> PartRegs,
164 LLT LeftoverTy,
165 ArrayRef<Register> LeftoverRegs) {
166 if (!LeftoverTy.isValid()) {
167 assert(LeftoverRegs.empty());
168
169 if (!ResultTy.isVector()) {
170 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
171 return;
172 }
173
174 if (PartTy.isVector())
175 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
176 else
177 MIRBuilder.buildBuildVector(DstReg, PartRegs);
178 return;
179 }
180
181 // Merge sub-vectors with different number of elements and insert into DstReg.
182 if (ResultTy.isVector()) {
183 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
185 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
186 AllRegs.push_back(Reg);
187 return mergeMixedSubvectors(DstReg, AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
202 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (MRI.getType(Leftover).isScalar())
216 AllElts.push_back(Leftover);
217 else
218 appendVectorElts(AllElts, Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
245 getUnmergeResults(Parts, *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(SrcReg);
252 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(DstTy, NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
281 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(DstReg, Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372
373 MIRBuilder.buildUnmerge(UnmergeDefs,
374 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_MUL:
414 RTLIBCASE_INT(MUL_I);
415 case TargetOpcode::G_SDIV:
416 RTLIBCASE_INT(SDIV_I);
417 case TargetOpcode::G_UDIV:
418 RTLIBCASE_INT(UDIV_I);
419 case TargetOpcode::G_SREM:
420 RTLIBCASE_INT(SREM_I);
421 case TargetOpcode::G_UREM:
422 RTLIBCASE_INT(UREM_I);
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424 RTLIBCASE_INT(CTLZ_I);
425 case TargetOpcode::G_FADD:
426 RTLIBCASE(ADD_F);
427 case TargetOpcode::G_FSUB:
428 RTLIBCASE(SUB_F);
429 case TargetOpcode::G_FMUL:
430 RTLIBCASE(MUL_F);
431 case TargetOpcode::G_FDIV:
432 RTLIBCASE(DIV_F);
433 case TargetOpcode::G_FEXP:
434 RTLIBCASE(EXP_F);
435 case TargetOpcode::G_FEXP2:
436 RTLIBCASE(EXP2_F);
437 case TargetOpcode::G_FEXP10:
438 RTLIBCASE(EXP10_F);
439 case TargetOpcode::G_FREM:
440 RTLIBCASE(REM_F);
441 case TargetOpcode::G_FPOW:
442 RTLIBCASE(POW_F);
443 case TargetOpcode::G_FPOWI:
444 RTLIBCASE(POWI_F);
445 case TargetOpcode::G_FMA:
446 RTLIBCASE(FMA_F);
447 case TargetOpcode::G_FSIN:
448 RTLIBCASE(SIN_F);
449 case TargetOpcode::G_FCOS:
450 RTLIBCASE(COS_F);
451 case TargetOpcode::G_FLOG10:
452 RTLIBCASE(LOG10_F);
453 case TargetOpcode::G_FLOG:
454 RTLIBCASE(LOG_F);
455 case TargetOpcode::G_FLOG2:
456 RTLIBCASE(LOG2_F);
457 case TargetOpcode::G_FLDEXP:
458 RTLIBCASE(LDEXP_F);
459 case TargetOpcode::G_FCEIL:
460 RTLIBCASE(CEIL_F);
461 case TargetOpcode::G_FFLOOR:
462 RTLIBCASE(FLOOR_F);
463 case TargetOpcode::G_FMINNUM:
464 RTLIBCASE(FMIN_F);
465 case TargetOpcode::G_FMAXNUM:
466 RTLIBCASE(FMAX_F);
467 case TargetOpcode::G_FSQRT:
468 RTLIBCASE(SQRT_F);
469 case TargetOpcode::G_FRINT:
470 RTLIBCASE(RINT_F);
471 case TargetOpcode::G_FNEARBYINT:
472 RTLIBCASE(NEARBYINT_F);
473 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
474 RTLIBCASE(ROUNDEVEN_F);
475 case TargetOpcode::G_INTRINSIC_LRINT:
476 RTLIBCASE(LRINT_F);
477 case TargetOpcode::G_INTRINSIC_LLRINT:
478 RTLIBCASE(LLRINT_F);
479 }
480 llvm_unreachable("Unknown libcall function");
481}
482
483/// True if an instruction is in tail position in its caller. Intended for
484/// legalizing libcalls as tail calls when possible.
487 const TargetInstrInfo &TII,
489 MachineBasicBlock &MBB = *MI.getParent();
490 const Function &F = MBB.getParent()->getFunction();
491
492 // Conservatively require the attributes of the call to match those of
493 // the return. Ignore NoAlias and NonNull because they don't affect the
494 // call sequence.
495 AttributeList CallerAttrs = F.getAttributes();
496 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
497 .removeAttribute(Attribute::NoAlias)
498 .removeAttribute(Attribute::NonNull)
499 .hasAttributes())
500 return false;
501
502 // It's not safe to eliminate the sign / zero extension of the return value.
503 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
504 CallerAttrs.hasRetAttr(Attribute::SExt))
505 return false;
506
507 // Only tail call if the following instruction is a standard return or if we
508 // have a `thisreturn` callee, and a sequence like:
509 //
510 // G_MEMCPY %0, %1, %2
511 // $x0 = COPY %0
512 // RET_ReallyLR implicit $x0
513 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
514 if (Next != MBB.instr_end() && Next->isCopy()) {
515 if (MI.getOpcode() == TargetOpcode::G_BZERO)
516 return false;
517
518 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
519 // mempy/etc routines return the same parameter. For other it will be the
520 // returned value.
521 Register VReg = MI.getOperand(0).getReg();
522 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
523 return false;
524
525 Register PReg = Next->getOperand(0).getReg();
526 if (!PReg.isPhysical())
527 return false;
528
529 auto Ret = next_nodbg(Next, MBB.instr_end());
530 if (Ret == MBB.instr_end() || !Ret->isReturn())
531 return false;
532
533 if (Ret->getNumImplicitOperands() != 1)
534 return false;
535
536 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
537 return false;
538
539 // Skip over the COPY that we just validated.
540 Next = Ret;
541 }
542
543 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
544 return false;
545
546 return true;
547}
548
551 const CallLowering::ArgInfo &Result,
553 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
554 MachineInstr *MI) {
555 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
556
558 Info.CallConv = CC;
560 Info.OrigRet = Result;
561 if (MI)
562 Info.IsTailCall =
563 (Result.Ty->isVoidTy() ||
564 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
565 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
566 *MIRBuilder.getMRI());
567
568 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
569 if (!CLI.lowerCall(MIRBuilder, Info))
571
572 if (MI && Info.LoweredTailCall) {
573 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
574
575 // Check debug locations before removing the return.
576 LocObserver.checkpoint(true);
577
578 // We must have a return following the call (or debug insts) to get past
579 // isLibCallInTailPosition.
580 do {
581 MachineInstr *Next = MI->getNextNode();
582 assert(Next &&
583 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
584 "Expected instr following MI to be return or debug inst?");
585 // We lowered a tail call, so the call is now the return from the block.
586 // Delete the old return.
587 Next->eraseFromParent();
588 } while (MI->getNextNode());
589
590 // We expect to lose the debug location from the return.
591 LocObserver.checkpoint(false);
592 }
594}
595
598 const CallLowering::ArgInfo &Result,
600 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
601 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
602 const char *Name = TLI.getLibcallName(Libcall);
603 if (!Name)
605 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
606 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
607}
608
609// Useful for libcalls where all operands have the same type.
612 Type *OpType, LostDebugLocObserver &LocObserver) {
613 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
614
615 // FIXME: What does the original arg index mean here?
617 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
618 Args.push_back({MO.getReg(), OpType, 0});
619 return createLibcall(MIRBuilder, Libcall,
620 {MI.getOperand(0).getReg(), OpType, 0}, Args,
621 LocObserver, &MI);
622}
623
626 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
627 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
628
630 // Add all the args, except for the last which is an imm denoting 'tail'.
631 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
632 Register Reg = MI.getOperand(i).getReg();
633
634 // Need derive an IR type for call lowering.
635 LLT OpLLT = MRI.getType(Reg);
636 Type *OpTy = nullptr;
637 if (OpLLT.isPointer())
638 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
639 else
640 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
641 Args.push_back({Reg, OpTy, 0});
642 }
643
644 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
645 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
646 RTLIB::Libcall RTLibcall;
647 unsigned Opc = MI.getOpcode();
648 switch (Opc) {
649 case TargetOpcode::G_BZERO:
650 RTLibcall = RTLIB::BZERO;
651 break;
652 case TargetOpcode::G_MEMCPY:
653 RTLibcall = RTLIB::MEMCPY;
654 Args[0].Flags[0].setReturned();
655 break;
656 case TargetOpcode::G_MEMMOVE:
657 RTLibcall = RTLIB::MEMMOVE;
658 Args[0].Flags[0].setReturned();
659 break;
660 case TargetOpcode::G_MEMSET:
661 RTLibcall = RTLIB::MEMSET;
662 Args[0].Flags[0].setReturned();
663 break;
664 default:
665 llvm_unreachable("unsupported opcode");
666 }
667 const char *Name = TLI.getLibcallName(RTLibcall);
668
669 // Unsupported libcall on the target.
670 if (!Name) {
671 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
672 << MIRBuilder.getTII().getName(Opc) << "\n");
674 }
675
677 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
679 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
680 Info.IsTailCall =
681 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
682 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
683
684 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
685 if (!CLI.lowerCall(MIRBuilder, Info))
687
688 if (Info.LoweredTailCall) {
689 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
690
691 // Check debug locations before removing the return.
692 LocObserver.checkpoint(true);
693
694 // We must have a return following the call (or debug insts) to get past
695 // isLibCallInTailPosition.
696 do {
697 MachineInstr *Next = MI.getNextNode();
698 assert(Next &&
699 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
700 "Expected instr following MI to be return or debug inst?");
701 // We lowered a tail call, so the call is now the return from the block.
702 // Delete the old return.
703 Next->eraseFromParent();
704 } while (MI.getNextNode());
705
706 // We expect to lose the debug location from the return.
707 LocObserver.checkpoint(false);
708 }
709
711}
712
714 unsigned Opc = MI.getOpcode();
715 auto &AtomicMI = cast<GMemOperation>(MI);
716 auto &MMO = AtomicMI.getMMO();
717 auto Ordering = MMO.getMergedOrdering();
718 LLT MemType = MMO.getMemoryType();
719 uint64_t MemSize = MemType.getSizeInBytes();
720 if (MemType.isVector())
721 return RTLIB::UNKNOWN_LIBCALL;
722
723#define LCALLS(A, B) \
724 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
725#define LCALL5(A) \
726 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
727 switch (Opc) {
728 case TargetOpcode::G_ATOMIC_CMPXCHG:
729 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
730 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
731 return getOutlineAtomicHelper(LC, Ordering, MemSize);
732 }
733 case TargetOpcode::G_ATOMICRMW_XCHG: {
734 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
735 return getOutlineAtomicHelper(LC, Ordering, MemSize);
736 }
737 case TargetOpcode::G_ATOMICRMW_ADD:
738 case TargetOpcode::G_ATOMICRMW_SUB: {
739 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
740 return getOutlineAtomicHelper(LC, Ordering, MemSize);
741 }
742 case TargetOpcode::G_ATOMICRMW_AND: {
743 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
744 return getOutlineAtomicHelper(LC, Ordering, MemSize);
745 }
746 case TargetOpcode::G_ATOMICRMW_OR: {
747 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
748 return getOutlineAtomicHelper(LC, Ordering, MemSize);
749 }
750 case TargetOpcode::G_ATOMICRMW_XOR: {
751 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
752 return getOutlineAtomicHelper(LC, Ordering, MemSize);
753 }
754 default:
755 return RTLIB::UNKNOWN_LIBCALL;
756 }
757#undef LCALLS
758#undef LCALL5
759}
760
763 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
764
765 Type *RetTy;
766 SmallVector<Register> RetRegs;
768 unsigned Opc = MI.getOpcode();
769 switch (Opc) {
770 case TargetOpcode::G_ATOMIC_CMPXCHG:
771 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
773 LLT SuccessLLT;
774 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
775 MI.getFirst4RegLLTs();
776 RetRegs.push_back(Ret);
777 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
778 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
779 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
780 NewLLT) = MI.getFirst5RegLLTs();
781 RetRegs.push_back(Success);
783 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
784 }
785 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
786 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
787 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
788 break;
789 }
790 case TargetOpcode::G_ATOMICRMW_XCHG:
791 case TargetOpcode::G_ATOMICRMW_ADD:
792 case TargetOpcode::G_ATOMICRMW_SUB:
793 case TargetOpcode::G_ATOMICRMW_AND:
794 case TargetOpcode::G_ATOMICRMW_OR:
795 case TargetOpcode::G_ATOMICRMW_XOR: {
796 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
797 RetRegs.push_back(Ret);
798 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
799 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
800 Val =
801 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
802 .getReg(0);
803 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
804 Val =
805 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
806 .getReg(0);
807 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
808 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
809 break;
810 }
811 default:
812 llvm_unreachable("unsupported opcode");
813 }
814
815 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
816 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
818 const char *Name = TLI.getLibcallName(RTLibcall);
819
820 // Unsupported libcall on the target.
821 if (!Name) {
822 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
823 << MIRBuilder.getTII().getName(Opc) << "\n");
825 }
826
828 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
830 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
831
832 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
833 if (!CLI.lowerCall(MIRBuilder, Info))
835
837}
838
839static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
840 Type *FromType) {
841 auto ToMVT = MVT::getVT(ToType);
842 auto FromMVT = MVT::getVT(FromType);
843
844 switch (Opcode) {
845 case TargetOpcode::G_FPEXT:
846 return RTLIB::getFPEXT(FromMVT, ToMVT);
847 case TargetOpcode::G_FPTRUNC:
848 return RTLIB::getFPROUND(FromMVT, ToMVT);
849 case TargetOpcode::G_FPTOSI:
850 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
851 case TargetOpcode::G_FPTOUI:
852 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
853 case TargetOpcode::G_SITOFP:
854 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
855 case TargetOpcode::G_UITOFP:
856 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
857 }
858 llvm_unreachable("Unsupported libcall function");
859}
860
863 Type *FromType, LostDebugLocObserver &LocObserver) {
864 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
865 return createLibcall(
866 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
867 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
868}
869
870static RTLIB::Libcall
872 RTLIB::Libcall RTLibcall;
873 switch (MI.getOpcode()) {
874 case TargetOpcode::G_GET_FPENV:
875 RTLibcall = RTLIB::FEGETENV;
876 break;
877 case TargetOpcode::G_SET_FPENV:
878 case TargetOpcode::G_RESET_FPENV:
879 RTLibcall = RTLIB::FESETENV;
880 break;
881 case TargetOpcode::G_GET_FPMODE:
882 RTLibcall = RTLIB::FEGETMODE;
883 break;
884 case TargetOpcode::G_SET_FPMODE:
885 case TargetOpcode::G_RESET_FPMODE:
886 RTLibcall = RTLIB::FESETMODE;
887 break;
888 default:
889 llvm_unreachable("Unexpected opcode");
890 }
891 return RTLibcall;
892}
893
894// Some library functions that read FP state (fegetmode, fegetenv) write the
895// state into a region in memory. IR intrinsics that do the same operations
896// (get_fpmode, get_fpenv) return the state as integer value. To implement these
897// intrinsics via the library functions, we need to use temporary variable,
898// for example:
899//
900// %0:_(s32) = G_GET_FPMODE
901//
902// is transformed to:
903//
904// %1:_(p0) = G_FRAME_INDEX %stack.0
905// BL &fegetmode
906// %0:_(s32) = G_LOAD % 1
907//
909LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
911 LostDebugLocObserver &LocObserver) {
913 auto &MF = MIRBuilder.getMF();
914 auto &MRI = *MIRBuilder.getMRI();
915 auto &Ctx = MF.getFunction().getContext();
916
917 // Create temporary, where library function will put the read state.
918 Register Dst = MI.getOperand(0).getReg();
919 LLT StateTy = MRI.getType(Dst);
920 TypeSize StateSize = StateTy.getSizeInBytes();
922 MachinePointerInfo TempPtrInfo;
923 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
924
925 // Create a call to library function, with the temporary as an argument.
926 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
927 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
929 auto Res =
930 createLibcall(MIRBuilder, RTLibcall,
932 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
933 LocObserver, nullptr);
935 return Res;
936
937 // Create a load from the temporary.
938 MachineMemOperand *MMO = MF.getMachineMemOperand(
939 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
940 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
941
943}
944
945// Similar to `createGetStateLibcall` the function calls a library function
946// using transient space in stack. In this case the library function reads
947// content of memory region.
949LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
951 LostDebugLocObserver &LocObserver) {
953 auto &MF = MIRBuilder.getMF();
954 auto &MRI = *MIRBuilder.getMRI();
955 auto &Ctx = MF.getFunction().getContext();
956
957 // Create temporary, where library function will get the new state.
958 Register Src = MI.getOperand(0).getReg();
959 LLT StateTy = MRI.getType(Src);
960 TypeSize StateSize = StateTy.getSizeInBytes();
962 MachinePointerInfo TempPtrInfo;
963 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
964
965 // Put the new state into the temporary.
966 MachineMemOperand *MMO = MF.getMachineMemOperand(
967 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
968 MIRBuilder.buildStore(Src, Temp, *MMO);
969
970 // Create a call to library function, with the temporary as an argument.
971 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
972 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
974 return createLibcall(MIRBuilder, RTLibcall,
976 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
977 LocObserver, nullptr);
978}
979
980// The function is used to legalize operations that set default environment
981// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
982// On most targets supported in glibc FE_DFL_MODE is defined as
983// `((const femode_t *) -1)`. Such assumption is used here. If for some target
984// it is not true, the target must provide custom lowering.
986LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
988 LostDebugLocObserver &LocObserver) {
990 auto &MF = MIRBuilder.getMF();
991 auto &Ctx = MF.getFunction().getContext();
992
993 // Create an argument for the library function.
994 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
995 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
996 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
997 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
998 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
999 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1000 MIRBuilder.buildIntToPtr(Dest, DefValue);
1001
1003 return createLibcall(MIRBuilder, RTLibcall,
1005 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1006 LocObserver, &MI);
1007}
1008
1011 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1012
1013 switch (MI.getOpcode()) {
1014 default:
1015 return UnableToLegalize;
1016 case TargetOpcode::G_MUL:
1017 case TargetOpcode::G_SDIV:
1018 case TargetOpcode::G_UDIV:
1019 case TargetOpcode::G_SREM:
1020 case TargetOpcode::G_UREM:
1021 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1022 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1023 unsigned Size = LLTy.getSizeInBits();
1024 Type *HLTy = IntegerType::get(Ctx, Size);
1025 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1026 if (Status != Legalized)
1027 return Status;
1028 break;
1029 }
1030 case TargetOpcode::G_FADD:
1031 case TargetOpcode::G_FSUB:
1032 case TargetOpcode::G_FMUL:
1033 case TargetOpcode::G_FDIV:
1034 case TargetOpcode::G_FMA:
1035 case TargetOpcode::G_FPOW:
1036 case TargetOpcode::G_FREM:
1037 case TargetOpcode::G_FCOS:
1038 case TargetOpcode::G_FSIN:
1039 case TargetOpcode::G_FLOG10:
1040 case TargetOpcode::G_FLOG:
1041 case TargetOpcode::G_FLOG2:
1042 case TargetOpcode::G_FLDEXP:
1043 case TargetOpcode::G_FEXP:
1044 case TargetOpcode::G_FEXP2:
1045 case TargetOpcode::G_FEXP10:
1046 case TargetOpcode::G_FCEIL:
1047 case TargetOpcode::G_FFLOOR:
1048 case TargetOpcode::G_FMINNUM:
1049 case TargetOpcode::G_FMAXNUM:
1050 case TargetOpcode::G_FSQRT:
1051 case TargetOpcode::G_FRINT:
1052 case TargetOpcode::G_FNEARBYINT:
1053 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1054 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1055 unsigned Size = LLTy.getSizeInBits();
1056 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1057 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1058 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1059 return UnableToLegalize;
1060 }
1061 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1062 if (Status != Legalized)
1063 return Status;
1064 break;
1065 }
1066 case TargetOpcode::G_INTRINSIC_LRINT:
1067 case TargetOpcode::G_INTRINSIC_LLRINT: {
1068 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1069 unsigned Size = LLTy.getSizeInBits();
1070 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1071 Type *ITy = IntegerType::get(
1072 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1073 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1074 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1075 return UnableToLegalize;
1076 }
1077 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1079 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1080 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1081 if (Status != Legalized)
1082 return Status;
1083 MI.eraseFromParent();
1084 return Legalized;
1085 }
1086 case TargetOpcode::G_FPOWI: {
1087 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1088 unsigned Size = LLTy.getSizeInBits();
1089 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1090 Type *ITy = IntegerType::get(
1091 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1092 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1093 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1094 return UnableToLegalize;
1095 }
1096 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1097 std::initializer_list<CallLowering::ArgInfo> Args = {
1098 {MI.getOperand(1).getReg(), HLTy, 0},
1099 {MI.getOperand(2).getReg(), ITy, 1}};
1101 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1102 Args, LocObserver, &MI);
1103 if (Status != Legalized)
1104 return Status;
1105 break;
1106 }
1107 case TargetOpcode::G_FPEXT:
1108 case TargetOpcode::G_FPTRUNC: {
1109 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1110 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1111 if (!FromTy || !ToTy)
1112 return UnableToLegalize;
1114 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1115 if (Status != Legalized)
1116 return Status;
1117 break;
1118 }
1119 case TargetOpcode::G_FPTOSI:
1120 case TargetOpcode::G_FPTOUI: {
1121 // FIXME: Support other types
1122 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1123 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1124 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1125 return UnableToLegalize;
1127 MI, MIRBuilder,
1128 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1129 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1130 LocObserver);
1131 if (Status != Legalized)
1132 return Status;
1133 break;
1134 }
1135 case TargetOpcode::G_SITOFP:
1136 case TargetOpcode::G_UITOFP: {
1137 // FIXME: Support other types
1138 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1139 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1140 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1141 return UnableToLegalize;
1143 MI, MIRBuilder,
1144 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1145 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1146 LocObserver);
1147 if (Status != Legalized)
1148 return Status;
1149 break;
1150 }
1151 case TargetOpcode::G_ATOMICRMW_XCHG:
1152 case TargetOpcode::G_ATOMICRMW_ADD:
1153 case TargetOpcode::G_ATOMICRMW_SUB:
1154 case TargetOpcode::G_ATOMICRMW_AND:
1155 case TargetOpcode::G_ATOMICRMW_OR:
1156 case TargetOpcode::G_ATOMICRMW_XOR:
1157 case TargetOpcode::G_ATOMIC_CMPXCHG:
1158 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1160 if (Status != Legalized)
1161 return Status;
1162 break;
1163 }
1164 case TargetOpcode::G_BZERO:
1165 case TargetOpcode::G_MEMCPY:
1166 case TargetOpcode::G_MEMMOVE:
1167 case TargetOpcode::G_MEMSET: {
1168 LegalizeResult Result =
1169 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1170 if (Result != Legalized)
1171 return Result;
1172 MI.eraseFromParent();
1173 return Result;
1174 }
1175 case TargetOpcode::G_GET_FPENV:
1176 case TargetOpcode::G_GET_FPMODE: {
1177 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1178 if (Result != Legalized)
1179 return Result;
1180 break;
1181 }
1182 case TargetOpcode::G_SET_FPENV:
1183 case TargetOpcode::G_SET_FPMODE: {
1184 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1185 if (Result != Legalized)
1186 return Result;
1187 break;
1188 }
1189 case TargetOpcode::G_RESET_FPENV:
1190 case TargetOpcode::G_RESET_FPMODE: {
1191 LegalizeResult Result =
1192 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1193 if (Result != Legalized)
1194 return Result;
1195 break;
1196 }
1197 }
1198
1199 MI.eraseFromParent();
1200 return Legalized;
1201}
1202
1204 unsigned TypeIdx,
1205 LLT NarrowTy) {
1206 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1207 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1208
1209 switch (MI.getOpcode()) {
1210 default:
1211 return UnableToLegalize;
1212 case TargetOpcode::G_IMPLICIT_DEF: {
1213 Register DstReg = MI.getOperand(0).getReg();
1214 LLT DstTy = MRI.getType(DstReg);
1215
1216 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1217 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1218 // FIXME: Although this would also be legal for the general case, it causes
1219 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1220 // combines not being hit). This seems to be a problem related to the
1221 // artifact combiner.
1222 if (SizeOp0 % NarrowSize != 0) {
1223 LLT ImplicitTy = NarrowTy;
1224 if (DstTy.isVector())
1225 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1226
1227 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1228 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1229
1230 MI.eraseFromParent();
1231 return Legalized;
1232 }
1233
1234 int NumParts = SizeOp0 / NarrowSize;
1235
1237 for (int i = 0; i < NumParts; ++i)
1238 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1239
1240 if (DstTy.isVector())
1241 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1242 else
1243 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1244 MI.eraseFromParent();
1245 return Legalized;
1246 }
1247 case TargetOpcode::G_CONSTANT: {
1248 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1249 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1250 unsigned TotalSize = Ty.getSizeInBits();
1251 unsigned NarrowSize = NarrowTy.getSizeInBits();
1252 int NumParts = TotalSize / NarrowSize;
1253
1254 SmallVector<Register, 4> PartRegs;
1255 for (int I = 0; I != NumParts; ++I) {
1256 unsigned Offset = I * NarrowSize;
1257 auto K = MIRBuilder.buildConstant(NarrowTy,
1258 Val.lshr(Offset).trunc(NarrowSize));
1259 PartRegs.push_back(K.getReg(0));
1260 }
1261
1262 LLT LeftoverTy;
1263 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1264 SmallVector<Register, 1> LeftoverRegs;
1265 if (LeftoverBits != 0) {
1266 LeftoverTy = LLT::scalar(LeftoverBits);
1267 auto K = MIRBuilder.buildConstant(
1268 LeftoverTy,
1269 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1270 LeftoverRegs.push_back(K.getReg(0));
1271 }
1272
1273 insertParts(MI.getOperand(0).getReg(),
1274 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1275
1276 MI.eraseFromParent();
1277 return Legalized;
1278 }
1279 case TargetOpcode::G_SEXT:
1280 case TargetOpcode::G_ZEXT:
1281 case TargetOpcode::G_ANYEXT:
1282 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1283 case TargetOpcode::G_TRUNC: {
1284 if (TypeIdx != 1)
1285 return UnableToLegalize;
1286
1287 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1288 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1289 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1290 return UnableToLegalize;
1291 }
1292
1293 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1294 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1295 MI.eraseFromParent();
1296 return Legalized;
1297 }
1298
1299 case TargetOpcode::G_FREEZE: {
1300 if (TypeIdx != 0)
1301 return UnableToLegalize;
1302
1303 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1304 // Should widen scalar first
1305 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1306 return UnableToLegalize;
1307
1308 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1310 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1311 Parts.push_back(
1312 MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1313 }
1314
1315 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1316 MI.eraseFromParent();
1317 return Legalized;
1318 }
1319 case TargetOpcode::G_ADD:
1320 case TargetOpcode::G_SUB:
1321 case TargetOpcode::G_SADDO:
1322 case TargetOpcode::G_SSUBO:
1323 case TargetOpcode::G_SADDE:
1324 case TargetOpcode::G_SSUBE:
1325 case TargetOpcode::G_UADDO:
1326 case TargetOpcode::G_USUBO:
1327 case TargetOpcode::G_UADDE:
1328 case TargetOpcode::G_USUBE:
1329 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1330 case TargetOpcode::G_MUL:
1331 case TargetOpcode::G_UMULH:
1332 return narrowScalarMul(MI, NarrowTy);
1333 case TargetOpcode::G_EXTRACT:
1334 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1335 case TargetOpcode::G_INSERT:
1336 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1337 case TargetOpcode::G_LOAD: {
1338 auto &LoadMI = cast<GLoad>(MI);
1339 Register DstReg = LoadMI.getDstReg();
1340 LLT DstTy = MRI.getType(DstReg);
1341 if (DstTy.isVector())
1342 return UnableToLegalize;
1343
1344 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1345 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1346 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1347 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1348 LoadMI.eraseFromParent();
1349 return Legalized;
1350 }
1351
1352 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1353 }
1354 case TargetOpcode::G_ZEXTLOAD:
1355 case TargetOpcode::G_SEXTLOAD: {
1356 auto &LoadMI = cast<GExtLoad>(MI);
1357 Register DstReg = LoadMI.getDstReg();
1358 Register PtrReg = LoadMI.getPointerReg();
1359
1360 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1361 auto &MMO = LoadMI.getMMO();
1362 unsigned MemSize = MMO.getSizeInBits().getValue();
1363
1364 if (MemSize == NarrowSize) {
1365 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1366 } else if (MemSize < NarrowSize) {
1367 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1368 } else if (MemSize > NarrowSize) {
1369 // FIXME: Need to split the load.
1370 return UnableToLegalize;
1371 }
1372
1373 if (isa<GZExtLoad>(LoadMI))
1374 MIRBuilder.buildZExt(DstReg, TmpReg);
1375 else
1376 MIRBuilder.buildSExt(DstReg, TmpReg);
1377
1378 LoadMI.eraseFromParent();
1379 return Legalized;
1380 }
1381 case TargetOpcode::G_STORE: {
1382 auto &StoreMI = cast<GStore>(MI);
1383
1384 Register SrcReg = StoreMI.getValueReg();
1385 LLT SrcTy = MRI.getType(SrcReg);
1386 if (SrcTy.isVector())
1387 return UnableToLegalize;
1388
1389 int NumParts = SizeOp0 / NarrowSize;
1390 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1391 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1392 if (SrcTy.isVector() && LeftoverBits != 0)
1393 return UnableToLegalize;
1394
1395 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1396 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1397 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1398 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1399 StoreMI.eraseFromParent();
1400 return Legalized;
1401 }
1402
1403 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1404 }
1405 case TargetOpcode::G_SELECT:
1406 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1407 case TargetOpcode::G_AND:
1408 case TargetOpcode::G_OR:
1409 case TargetOpcode::G_XOR: {
1410 // Legalize bitwise operation:
1411 // A = BinOp<Ty> B, C
1412 // into:
1413 // B1, ..., BN = G_UNMERGE_VALUES B
1414 // C1, ..., CN = G_UNMERGE_VALUES C
1415 // A1 = BinOp<Ty/N> B1, C2
1416 // ...
1417 // AN = BinOp<Ty/N> BN, CN
1418 // A = G_MERGE_VALUES A1, ..., AN
1419 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1420 }
1421 case TargetOpcode::G_SHL:
1422 case TargetOpcode::G_LSHR:
1423 case TargetOpcode::G_ASHR:
1424 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1425 case TargetOpcode::G_CTLZ:
1426 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1427 case TargetOpcode::G_CTTZ:
1428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1429 case TargetOpcode::G_CTPOP:
1430 if (TypeIdx == 1)
1431 switch (MI.getOpcode()) {
1432 case TargetOpcode::G_CTLZ:
1433 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1434 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1435 case TargetOpcode::G_CTTZ:
1436 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1437 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1438 case TargetOpcode::G_CTPOP:
1439 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1440 default:
1441 return UnableToLegalize;
1442 }
1443
1445 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1447 return Legalized;
1448 case TargetOpcode::G_INTTOPTR:
1449 if (TypeIdx != 1)
1450 return UnableToLegalize;
1451
1453 narrowScalarSrc(MI, NarrowTy, 1);
1455 return Legalized;
1456 case TargetOpcode::G_PTRTOINT:
1457 if (TypeIdx != 0)
1458 return UnableToLegalize;
1459
1461 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1463 return Legalized;
1464 case TargetOpcode::G_PHI: {
1465 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1466 // NarrowSize.
1467 if (SizeOp0 % NarrowSize != 0)
1468 return UnableToLegalize;
1469
1470 unsigned NumParts = SizeOp0 / NarrowSize;
1471 SmallVector<Register, 2> DstRegs(NumParts);
1472 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1474 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1475 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1477 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1478 SrcRegs[i / 2], MIRBuilder, MRI);
1479 }
1480 MachineBasicBlock &MBB = *MI.getParent();
1482 for (unsigned i = 0; i < NumParts; ++i) {
1483 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1485 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1486 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1487 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1488 }
1490 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1492 MI.eraseFromParent();
1493 return Legalized;
1494 }
1495 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1496 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1497 if (TypeIdx != 2)
1498 return UnableToLegalize;
1499
1500 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1502 narrowScalarSrc(MI, NarrowTy, OpIdx);
1504 return Legalized;
1505 }
1506 case TargetOpcode::G_ICMP: {
1507 Register LHS = MI.getOperand(2).getReg();
1508 LLT SrcTy = MRI.getType(LHS);
1509 uint64_t SrcSize = SrcTy.getSizeInBits();
1510 CmpInst::Predicate Pred =
1511 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1512
1513 // TODO: Handle the non-equality case for weird sizes.
1514 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1515 return UnableToLegalize;
1516
1517 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1518 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1519 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1520 LHSLeftoverRegs, MIRBuilder, MRI))
1521 return UnableToLegalize;
1522
1523 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1524 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1525 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1526 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1527 return UnableToLegalize;
1528
1529 // We now have the LHS and RHS of the compare split into narrow-type
1530 // registers, plus potentially some leftover type.
1531 Register Dst = MI.getOperand(0).getReg();
1532 LLT ResTy = MRI.getType(Dst);
1533 if (ICmpInst::isEquality(Pred)) {
1534 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1535 // them together. For each equal part, the result should be all 0s. For
1536 // each non-equal part, we'll get at least one 1.
1537 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1539 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1540 auto LHS = std::get<0>(LHSAndRHS);
1541 auto RHS = std::get<1>(LHSAndRHS);
1542 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1543 Xors.push_back(Xor);
1544 }
1545
1546 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1547 // to the desired narrow type so that we can OR them together later.
1548 SmallVector<Register, 4> WidenedXors;
1549 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1550 auto LHS = std::get<0>(LHSAndRHS);
1551 auto RHS = std::get<1>(LHSAndRHS);
1552 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1553 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1554 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1555 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1556 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1557 }
1558
1559 // Now, for each part we broke up, we know if they are equal/not equal
1560 // based off the G_XOR. We can OR these all together and compare against
1561 // 0 to get the result.
1562 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1563 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1564 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1565 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1566 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1567 } else {
1568 // TODO: Handle non-power-of-two types.
1569 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1570 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1571 Register LHSL = LHSPartRegs[0];
1572 Register LHSH = LHSPartRegs[1];
1573 Register RHSL = RHSPartRegs[0];
1574 Register RHSH = RHSPartRegs[1];
1575 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1576 MachineInstrBuilder CmpHEQ =
1579 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1580 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1581 }
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_FCMP:
1586 if (TypeIdx != 0)
1587 return UnableToLegalize;
1588
1590 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1592 return Legalized;
1593
1594 case TargetOpcode::G_SEXT_INREG: {
1595 if (TypeIdx != 0)
1596 return UnableToLegalize;
1597
1598 int64_t SizeInBits = MI.getOperand(2).getImm();
1599
1600 // So long as the new type has more bits than the bits we're extending we
1601 // don't need to break it apart.
1602 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1604 // We don't lose any non-extension bits by truncating the src and
1605 // sign-extending the dst.
1606 MachineOperand &MO1 = MI.getOperand(1);
1607 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1608 MO1.setReg(TruncMIB.getReg(0));
1609
1610 MachineOperand &MO2 = MI.getOperand(0);
1611 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1613 MIRBuilder.buildSExt(MO2, DstExt);
1614 MO2.setReg(DstExt);
1616 return Legalized;
1617 }
1618
1619 // Break it apart. Components below the extension point are unmodified. The
1620 // component containing the extension point becomes a narrower SEXT_INREG.
1621 // Components above it are ashr'd from the component containing the
1622 // extension point.
1623 if (SizeOp0 % NarrowSize != 0)
1624 return UnableToLegalize;
1625 int NumParts = SizeOp0 / NarrowSize;
1626
1627 // List the registers where the destination will be scattered.
1629 // List the registers where the source will be split.
1631
1632 // Create all the temporary registers.
1633 for (int i = 0; i < NumParts; ++i) {
1634 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1635
1636 SrcRegs.push_back(SrcReg);
1637 }
1638
1639 // Explode the big arguments into smaller chunks.
1640 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1641
1642 Register AshrCstReg =
1643 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1644 .getReg(0);
1645 Register FullExtensionReg;
1646 Register PartialExtensionReg;
1647
1648 // Do the operation on each small part.
1649 for (int i = 0; i < NumParts; ++i) {
1650 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1651 DstRegs.push_back(SrcRegs[i]);
1652 PartialExtensionReg = DstRegs.back();
1653 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1654 assert(PartialExtensionReg &&
1655 "Expected to visit partial extension before full");
1656 if (FullExtensionReg) {
1657 DstRegs.push_back(FullExtensionReg);
1658 continue;
1659 }
1660 DstRegs.push_back(
1661 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1662 .getReg(0));
1663 FullExtensionReg = DstRegs.back();
1664 } else {
1665 DstRegs.push_back(
1667 .buildInstr(
1668 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1669 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1670 .getReg(0));
1671 PartialExtensionReg = DstRegs.back();
1672 }
1673 }
1674
1675 // Gather the destination registers into the final destination.
1676 Register DstReg = MI.getOperand(0).getReg();
1677 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1678 MI.eraseFromParent();
1679 return Legalized;
1680 }
1681 case TargetOpcode::G_BSWAP:
1682 case TargetOpcode::G_BITREVERSE: {
1683 if (SizeOp0 % NarrowSize != 0)
1684 return UnableToLegalize;
1685
1687 SmallVector<Register, 2> SrcRegs, DstRegs;
1688 unsigned NumParts = SizeOp0 / NarrowSize;
1689 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1690 MIRBuilder, MRI);
1691
1692 for (unsigned i = 0; i < NumParts; ++i) {
1693 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1694 {SrcRegs[NumParts - 1 - i]});
1695 DstRegs.push_back(DstPart.getReg(0));
1696 }
1697
1698 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1699
1701 MI.eraseFromParent();
1702 return Legalized;
1703 }
1704 case TargetOpcode::G_PTR_ADD:
1705 case TargetOpcode::G_PTRMASK: {
1706 if (TypeIdx != 1)
1707 return UnableToLegalize;
1709 narrowScalarSrc(MI, NarrowTy, 2);
1711 return Legalized;
1712 }
1713 case TargetOpcode::G_FPTOUI:
1714 case TargetOpcode::G_FPTOSI:
1715 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1716 case TargetOpcode::G_FPEXT:
1717 if (TypeIdx != 0)
1718 return UnableToLegalize;
1720 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1722 return Legalized;
1723 case TargetOpcode::G_FLDEXP:
1724 case TargetOpcode::G_STRICT_FLDEXP:
1725 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1726 case TargetOpcode::G_VSCALE: {
1727 Register Dst = MI.getOperand(0).getReg();
1728 LLT Ty = MRI.getType(Dst);
1729
1730 // Assume VSCALE(1) fits into a legal integer
1731 const APInt One(NarrowTy.getSizeInBits(), 1);
1732 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1733 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1734 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1735 MIRBuilder.buildMul(Dst, ZExt, C);
1736
1737 MI.eraseFromParent();
1738 return Legalized;
1739 }
1740 }
1741}
1742
1744 LLT Ty = MRI.getType(Val);
1745 if (Ty.isScalar())
1746 return Val;
1747
1749 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1750 if (Ty.isPointer()) {
1751 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1752 return Register();
1753 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1754 }
1755
1756 Register NewVal = Val;
1757
1758 assert(Ty.isVector());
1759 if (Ty.isPointerVector())
1760 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1761 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1762}
1763
1765 unsigned OpIdx, unsigned ExtOpcode) {
1766 MachineOperand &MO = MI.getOperand(OpIdx);
1767 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1768 MO.setReg(ExtB.getReg(0));
1769}
1770
1772 unsigned OpIdx) {
1773 MachineOperand &MO = MI.getOperand(OpIdx);
1774 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1775 MO.setReg(ExtB.getReg(0));
1776}
1777
1779 unsigned OpIdx, unsigned TruncOpcode) {
1780 MachineOperand &MO = MI.getOperand(OpIdx);
1781 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1783 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1784 MO.setReg(DstExt);
1785}
1786
1788 unsigned OpIdx, unsigned ExtOpcode) {
1789 MachineOperand &MO = MI.getOperand(OpIdx);
1790 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1792 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1793 MO.setReg(DstTrunc);
1794}
1795
1797 unsigned OpIdx) {
1798 MachineOperand &MO = MI.getOperand(OpIdx);
1800 Register Dst = MO.getReg();
1801 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1802 MO.setReg(DstExt);
1804}
1805
1807 unsigned OpIdx) {
1808 MachineOperand &MO = MI.getOperand(OpIdx);
1811}
1812
1813void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1814 MachineOperand &Op = MI.getOperand(OpIdx);
1815 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1816}
1817
1818void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1819 MachineOperand &MO = MI.getOperand(OpIdx);
1820 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1822 MIRBuilder.buildBitcast(MO, CastDst);
1823 MO.setReg(CastDst);
1824}
1825
1827LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1828 LLT WideTy) {
1829 if (TypeIdx != 1)
1830 return UnableToLegalize;
1831
1832 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1833 if (DstTy.isVector())
1834 return UnableToLegalize;
1835
1836 LLT SrcTy = MRI.getType(Src1Reg);
1837 const int DstSize = DstTy.getSizeInBits();
1838 const int SrcSize = SrcTy.getSizeInBits();
1839 const int WideSize = WideTy.getSizeInBits();
1840 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1841
1842 unsigned NumOps = MI.getNumOperands();
1843 unsigned NumSrc = MI.getNumOperands() - 1;
1844 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1845
1846 if (WideSize >= DstSize) {
1847 // Directly pack the bits in the target type.
1848 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1849
1850 for (unsigned I = 2; I != NumOps; ++I) {
1851 const unsigned Offset = (I - 1) * PartSize;
1852
1853 Register SrcReg = MI.getOperand(I).getReg();
1854 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1855
1856 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1857
1858 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1859 MRI.createGenericVirtualRegister(WideTy);
1860
1861 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1862 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1863 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1864 ResultReg = NextResult;
1865 }
1866
1867 if (WideSize > DstSize)
1868 MIRBuilder.buildTrunc(DstReg, ResultReg);
1869 else if (DstTy.isPointer())
1870 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1871
1872 MI.eraseFromParent();
1873 return Legalized;
1874 }
1875
1876 // Unmerge the original values to the GCD type, and recombine to the next
1877 // multiple greater than the original type.
1878 //
1879 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1880 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1881 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1882 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1883 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1884 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1885 // %12:_(s12) = G_MERGE_VALUES %10, %11
1886 //
1887 // Padding with undef if necessary:
1888 //
1889 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1890 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1891 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1892 // %7:_(s2) = G_IMPLICIT_DEF
1893 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1894 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1895 // %10:_(s12) = G_MERGE_VALUES %8, %9
1896
1897 const int GCD = std::gcd(SrcSize, WideSize);
1898 LLT GCDTy = LLT::scalar(GCD);
1899
1901 SmallVector<Register, 8> NewMergeRegs;
1902 SmallVector<Register, 8> Unmerges;
1903 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1904
1905 // Decompose the original operands if they don't evenly divide.
1906 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1907 Register SrcReg = MO.getReg();
1908 if (GCD == SrcSize) {
1909 Unmerges.push_back(SrcReg);
1910 } else {
1911 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1912 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1913 Unmerges.push_back(Unmerge.getReg(J));
1914 }
1915 }
1916
1917 // Pad with undef to the next size that is a multiple of the requested size.
1918 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1919 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1920 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1921 Unmerges.push_back(UndefReg);
1922 }
1923
1924 const int PartsPerGCD = WideSize / GCD;
1925
1926 // Build merges of each piece.
1927 ArrayRef<Register> Slicer(Unmerges);
1928 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1929 auto Merge =
1930 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1931 NewMergeRegs.push_back(Merge.getReg(0));
1932 }
1933
1934 // A truncate may be necessary if the requested type doesn't evenly divide the
1935 // original result type.
1936 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1937 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1938 } else {
1939 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1940 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1941 }
1942
1943 MI.eraseFromParent();
1944 return Legalized;
1945}
1946
1948LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1949 LLT WideTy) {
1950 if (TypeIdx != 0)
1951 return UnableToLegalize;
1952
1953 int NumDst = MI.getNumOperands() - 1;
1954 Register SrcReg = MI.getOperand(NumDst).getReg();
1955 LLT SrcTy = MRI.getType(SrcReg);
1956 if (SrcTy.isVector())
1957 return UnableToLegalize;
1958
1959 Register Dst0Reg = MI.getOperand(0).getReg();
1960 LLT DstTy = MRI.getType(Dst0Reg);
1961 if (!DstTy.isScalar())
1962 return UnableToLegalize;
1963
1964 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1965 if (SrcTy.isPointer()) {
1967 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1968 LLVM_DEBUG(
1969 dbgs() << "Not casting non-integral address space integer\n");
1970 return UnableToLegalize;
1971 }
1972
1973 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1974 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1975 }
1976
1977 // Widen SrcTy to WideTy. This does not affect the result, but since the
1978 // user requested this size, it is probably better handled than SrcTy and
1979 // should reduce the total number of legalization artifacts.
1980 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1981 SrcTy = WideTy;
1982 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1983 }
1984
1985 // Theres no unmerge type to target. Directly extract the bits from the
1986 // source type
1987 unsigned DstSize = DstTy.getSizeInBits();
1988
1989 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1990 for (int I = 1; I != NumDst; ++I) {
1991 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1992 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1993 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1994 }
1995
1996 MI.eraseFromParent();
1997 return Legalized;
1998 }
1999
2000 // Extend the source to a wider type.
2001 LLT LCMTy = getLCMType(SrcTy, WideTy);
2002
2003 Register WideSrc = SrcReg;
2004 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2005 // TODO: If this is an integral address space, cast to integer and anyext.
2006 if (SrcTy.isPointer()) {
2007 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2008 return UnableToLegalize;
2009 }
2010
2011 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2012 }
2013
2014 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2015
2016 // Create a sequence of unmerges and merges to the original results. Since we
2017 // may have widened the source, we will need to pad the results with dead defs
2018 // to cover the source register.
2019 // e.g. widen s48 to s64:
2020 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2021 //
2022 // =>
2023 // %4:_(s192) = G_ANYEXT %0:_(s96)
2024 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2025 // ; unpack to GCD type, with extra dead defs
2026 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2027 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2028 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2029 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2030 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2031 const LLT GCDTy = getGCDType(WideTy, DstTy);
2032 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2033 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2034
2035 // Directly unmerge to the destination without going through a GCD type
2036 // if possible
2037 if (PartsPerRemerge == 1) {
2038 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2039
2040 for (int I = 0; I != NumUnmerge; ++I) {
2041 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2042
2043 for (int J = 0; J != PartsPerUnmerge; ++J) {
2044 int Idx = I * PartsPerUnmerge + J;
2045 if (Idx < NumDst)
2046 MIB.addDef(MI.getOperand(Idx).getReg());
2047 else {
2048 // Create dead def for excess components.
2049 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2050 }
2051 }
2052
2053 MIB.addUse(Unmerge.getReg(I));
2054 }
2055 } else {
2057 for (int J = 0; J != NumUnmerge; ++J)
2058 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2059
2060 SmallVector<Register, 8> RemergeParts;
2061 for (int I = 0; I != NumDst; ++I) {
2062 for (int J = 0; J < PartsPerRemerge; ++J) {
2063 const int Idx = I * PartsPerRemerge + J;
2064 RemergeParts.emplace_back(Parts[Idx]);
2065 }
2066
2067 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2068 RemergeParts.clear();
2069 }
2070 }
2071
2072 MI.eraseFromParent();
2073 return Legalized;
2074}
2075
2077LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2078 LLT WideTy) {
2079 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2080 unsigned Offset = MI.getOperand(2).getImm();
2081
2082 if (TypeIdx == 0) {
2083 if (SrcTy.isVector() || DstTy.isVector())
2084 return UnableToLegalize;
2085
2086 SrcOp Src(SrcReg);
2087 if (SrcTy.isPointer()) {
2088 // Extracts from pointers can be handled only if they are really just
2089 // simple integers.
2091 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2092 return UnableToLegalize;
2093
2094 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2095 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2096 SrcTy = SrcAsIntTy;
2097 }
2098
2099 if (DstTy.isPointer())
2100 return UnableToLegalize;
2101
2102 if (Offset == 0) {
2103 // Avoid a shift in the degenerate case.
2104 MIRBuilder.buildTrunc(DstReg,
2105 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2106 MI.eraseFromParent();
2107 return Legalized;
2108 }
2109
2110 // Do a shift in the source type.
2111 LLT ShiftTy = SrcTy;
2112 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2113 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2114 ShiftTy = WideTy;
2115 }
2116
2117 auto LShr = MIRBuilder.buildLShr(
2118 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2119 MIRBuilder.buildTrunc(DstReg, LShr);
2120 MI.eraseFromParent();
2121 return Legalized;
2122 }
2123
2124 if (SrcTy.isScalar()) {
2126 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2128 return Legalized;
2129 }
2130
2131 if (!SrcTy.isVector())
2132 return UnableToLegalize;
2133
2134 if (DstTy != SrcTy.getElementType())
2135 return UnableToLegalize;
2136
2137 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2138 return UnableToLegalize;
2139
2141 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2142
2143 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2144 Offset);
2145 widenScalarDst(MI, WideTy.getScalarType(), 0);
2147 return Legalized;
2148}
2149
2151LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2152 LLT WideTy) {
2153 if (TypeIdx != 0 || WideTy.isVector())
2154 return UnableToLegalize;
2156 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2157 widenScalarDst(MI, WideTy);
2159 return Legalized;
2160}
2161
2163LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2164 LLT WideTy) {
2165 unsigned Opcode;
2166 unsigned ExtOpcode;
2167 std::optional<Register> CarryIn;
2168 switch (MI.getOpcode()) {
2169 default:
2170 llvm_unreachable("Unexpected opcode!");
2171 case TargetOpcode::G_SADDO:
2172 Opcode = TargetOpcode::G_ADD;
2173 ExtOpcode = TargetOpcode::G_SEXT;
2174 break;
2175 case TargetOpcode::G_SSUBO:
2176 Opcode = TargetOpcode::G_SUB;
2177 ExtOpcode = TargetOpcode::G_SEXT;
2178 break;
2179 case TargetOpcode::G_UADDO:
2180 Opcode = TargetOpcode::G_ADD;
2181 ExtOpcode = TargetOpcode::G_ZEXT;
2182 break;
2183 case TargetOpcode::G_USUBO:
2184 Opcode = TargetOpcode::G_SUB;
2185 ExtOpcode = TargetOpcode::G_ZEXT;
2186 break;
2187 case TargetOpcode::G_SADDE:
2188 Opcode = TargetOpcode::G_UADDE;
2189 ExtOpcode = TargetOpcode::G_SEXT;
2190 CarryIn = MI.getOperand(4).getReg();
2191 break;
2192 case TargetOpcode::G_SSUBE:
2193 Opcode = TargetOpcode::G_USUBE;
2194 ExtOpcode = TargetOpcode::G_SEXT;
2195 CarryIn = MI.getOperand(4).getReg();
2196 break;
2197 case TargetOpcode::G_UADDE:
2198 Opcode = TargetOpcode::G_UADDE;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2200 CarryIn = MI.getOperand(4).getReg();
2201 break;
2202 case TargetOpcode::G_USUBE:
2203 Opcode = TargetOpcode::G_USUBE;
2204 ExtOpcode = TargetOpcode::G_ZEXT;
2205 CarryIn = MI.getOperand(4).getReg();
2206 break;
2207 }
2208
2209 if (TypeIdx == 1) {
2210 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2211
2213 if (CarryIn)
2214 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2215 widenScalarDst(MI, WideTy, 1);
2216
2218 return Legalized;
2219 }
2220
2221 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2222 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2223 // Do the arithmetic in the larger type.
2224 Register NewOp;
2225 if (CarryIn) {
2226 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2227 NewOp = MIRBuilder
2228 .buildInstr(Opcode, {WideTy, CarryOutTy},
2229 {LHSExt, RHSExt, *CarryIn})
2230 .getReg(0);
2231 } else {
2232 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2233 }
2234 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2235 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2236 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2237 // There is no overflow if the ExtOp is the same as NewOp.
2238 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2239 // Now trunc the NewOp to the original result.
2240 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2241 MI.eraseFromParent();
2242 return Legalized;
2243}
2244
2246LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2247 LLT WideTy) {
2248 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2249 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2250 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2251 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2252 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2253 // We can convert this to:
2254 // 1. Any extend iN to iM
2255 // 2. SHL by M-N
2256 // 3. [US][ADD|SUB|SHL]SAT
2257 // 4. L/ASHR by M-N
2258 //
2259 // It may be more efficient to lower this to a min and a max operation in
2260 // the higher precision arithmetic if the promoted operation isn't legal,
2261 // but this decision is up to the target's lowering request.
2262 Register DstReg = MI.getOperand(0).getReg();
2263
2264 unsigned NewBits = WideTy.getScalarSizeInBits();
2265 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2266
2267 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2268 // must not left shift the RHS to preserve the shift amount.
2269 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2270 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2271 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2272 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2273 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2274 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2275
2276 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2277 {ShiftL, ShiftR}, MI.getFlags());
2278
2279 // Use a shift that will preserve the number of sign bits when the trunc is
2280 // folded away.
2281 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2282 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2283
2284 MIRBuilder.buildTrunc(DstReg, Result);
2285 MI.eraseFromParent();
2286 return Legalized;
2287}
2288
2290LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2291 LLT WideTy) {
2292 if (TypeIdx == 1) {
2294 widenScalarDst(MI, WideTy, 1);
2296 return Legalized;
2297 }
2298
2299 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2300 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2301 LLT SrcTy = MRI.getType(LHS);
2302 LLT OverflowTy = MRI.getType(OriginalOverflow);
2303 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2304
2305 // To determine if the result overflowed in the larger type, we extend the
2306 // input to the larger type, do the multiply (checking if it overflows),
2307 // then also check the high bits of the result to see if overflow happened
2308 // there.
2309 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2310 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2311 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2312
2313 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2314 // so we don't need to check the overflow result of larger type Mulo.
2315 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2316
2317 unsigned MulOpc =
2318 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2319
2321 if (WideMulCanOverflow)
2322 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2323 {LeftOperand, RightOperand});
2324 else
2325 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2326
2327 auto Mul = Mulo->getOperand(0);
2328 MIRBuilder.buildTrunc(Result, Mul);
2329
2330 MachineInstrBuilder ExtResult;
2331 // Overflow occurred if it occurred in the larger type, or if the high part
2332 // of the result does not zero/sign-extend the low part. Check this second
2333 // possibility first.
2334 if (IsSigned) {
2335 // For signed, overflow occurred when the high part does not sign-extend
2336 // the low part.
2337 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2338 } else {
2339 // Unsigned overflow occurred when the high part does not zero-extend the
2340 // low part.
2341 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2342 }
2343
2344 if (WideMulCanOverflow) {
2345 auto Overflow =
2346 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2347 // Finally check if the multiplication in the larger type itself overflowed.
2348 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2349 } else {
2350 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2351 }
2352 MI.eraseFromParent();
2353 return Legalized;
2354}
2355
2358 switch (MI.getOpcode()) {
2359 default:
2360 return UnableToLegalize;
2361 case TargetOpcode::G_ATOMICRMW_XCHG:
2362 case TargetOpcode::G_ATOMICRMW_ADD:
2363 case TargetOpcode::G_ATOMICRMW_SUB:
2364 case TargetOpcode::G_ATOMICRMW_AND:
2365 case TargetOpcode::G_ATOMICRMW_OR:
2366 case TargetOpcode::G_ATOMICRMW_XOR:
2367 case TargetOpcode::G_ATOMICRMW_MIN:
2368 case TargetOpcode::G_ATOMICRMW_MAX:
2369 case TargetOpcode::G_ATOMICRMW_UMIN:
2370 case TargetOpcode::G_ATOMICRMW_UMAX:
2371 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2373 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2374 widenScalarDst(MI, WideTy, 0);
2376 return Legalized;
2377 case TargetOpcode::G_ATOMIC_CMPXCHG:
2378 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2380 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2381 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2382 widenScalarDst(MI, WideTy, 0);
2384 return Legalized;
2385 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2386 if (TypeIdx == 0) {
2388 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2389 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2390 widenScalarDst(MI, WideTy, 0);
2392 return Legalized;
2393 }
2394 assert(TypeIdx == 1 &&
2395 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2397 widenScalarDst(MI, WideTy, 1);
2399 return Legalized;
2400 case TargetOpcode::G_EXTRACT:
2401 return widenScalarExtract(MI, TypeIdx, WideTy);
2402 case TargetOpcode::G_INSERT:
2403 return widenScalarInsert(MI, TypeIdx, WideTy);
2404 case TargetOpcode::G_MERGE_VALUES:
2405 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2406 case TargetOpcode::G_UNMERGE_VALUES:
2407 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2408 case TargetOpcode::G_SADDO:
2409 case TargetOpcode::G_SSUBO:
2410 case TargetOpcode::G_UADDO:
2411 case TargetOpcode::G_USUBO:
2412 case TargetOpcode::G_SADDE:
2413 case TargetOpcode::G_SSUBE:
2414 case TargetOpcode::G_UADDE:
2415 case TargetOpcode::G_USUBE:
2416 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2417 case TargetOpcode::G_UMULO:
2418 case TargetOpcode::G_SMULO:
2419 return widenScalarMulo(MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_SADDSAT:
2421 case TargetOpcode::G_SSUBSAT:
2422 case TargetOpcode::G_SSHLSAT:
2423 case TargetOpcode::G_UADDSAT:
2424 case TargetOpcode::G_USUBSAT:
2425 case TargetOpcode::G_USHLSAT:
2426 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2427 case TargetOpcode::G_CTTZ:
2428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2429 case TargetOpcode::G_CTLZ:
2430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2431 case TargetOpcode::G_CTPOP: {
2432 if (TypeIdx == 0) {
2434 widenScalarDst(MI, WideTy, 0);
2436 return Legalized;
2437 }
2438
2439 Register SrcReg = MI.getOperand(1).getReg();
2440
2441 // First extend the input.
2442 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2443 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2444 ? TargetOpcode::G_ANYEXT
2445 : TargetOpcode::G_ZEXT;
2446 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2447 LLT CurTy = MRI.getType(SrcReg);
2448 unsigned NewOpc = MI.getOpcode();
2449 if (NewOpc == TargetOpcode::G_CTTZ) {
2450 // The count is the same in the larger type except if the original
2451 // value was zero. This can be handled by setting the bit just off
2452 // the top of the original type.
2453 auto TopBit =
2455 MIBSrc = MIRBuilder.buildOr(
2456 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2457 // Now we know the operand is non-zero, use the more relaxed opcode.
2458 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2459 }
2460
2461 // Perform the operation at the larger size.
2462 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2463 // This is already the correct result for CTPOP and CTTZs
2464 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2465 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2466 // The correct result is NewOp - (Difference in widety and current ty).
2467 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2468 MIBNewOp = MIRBuilder.buildSub(
2469 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2470 }
2471
2472 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2473 MI.eraseFromParent();
2474 return Legalized;
2475 }
2476 case TargetOpcode::G_BSWAP: {
2478 Register DstReg = MI.getOperand(0).getReg();
2479
2480 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2481 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2482 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2483 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2484
2485 MI.getOperand(0).setReg(DstExt);
2486
2488
2489 LLT Ty = MRI.getType(DstReg);
2490 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2491 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2492 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2493
2494 MIRBuilder.buildTrunc(DstReg, ShrReg);
2496 return Legalized;
2497 }
2498 case TargetOpcode::G_BITREVERSE: {
2500
2501 Register DstReg = MI.getOperand(0).getReg();
2502 LLT Ty = MRI.getType(DstReg);
2503 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2504
2505 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2506 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2507 MI.getOperand(0).setReg(DstExt);
2509
2510 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2511 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2512 MIRBuilder.buildTrunc(DstReg, Shift);
2514 return Legalized;
2515 }
2516 case TargetOpcode::G_FREEZE:
2518 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2519 widenScalarDst(MI, WideTy);
2521 return Legalized;
2522
2523 case TargetOpcode::G_ABS:
2525 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2526 widenScalarDst(MI, WideTy);
2528 return Legalized;
2529
2530 case TargetOpcode::G_ADD:
2531 case TargetOpcode::G_AND:
2532 case TargetOpcode::G_MUL:
2533 case TargetOpcode::G_OR:
2534 case TargetOpcode::G_XOR:
2535 case TargetOpcode::G_SUB:
2536 case TargetOpcode::G_SHUFFLE_VECTOR:
2537 // Perform operation at larger width (any extension is fines here, high bits
2538 // don't affect the result) and then truncate the result back to the
2539 // original type.
2541 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2542 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2543 widenScalarDst(MI, WideTy);
2545 return Legalized;
2546
2547 case TargetOpcode::G_SBFX:
2548 case TargetOpcode::G_UBFX:
2550
2551 if (TypeIdx == 0) {
2552 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2553 widenScalarDst(MI, WideTy);
2554 } else {
2555 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2556 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2557 }
2558
2560 return Legalized;
2561
2562 case TargetOpcode::G_SHL:
2564
2565 if (TypeIdx == 0) {
2566 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2567 widenScalarDst(MI, WideTy);
2568 } else {
2569 assert(TypeIdx == 1);
2570 // The "number of bits to shift" operand must preserve its value as an
2571 // unsigned integer:
2572 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2573 }
2574
2576 return Legalized;
2577
2578 case TargetOpcode::G_ROTR:
2579 case TargetOpcode::G_ROTL:
2580 if (TypeIdx != 1)
2581 return UnableToLegalize;
2582
2584 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2586 return Legalized;
2587
2588 case TargetOpcode::G_SDIV:
2589 case TargetOpcode::G_SREM:
2590 case TargetOpcode::G_SMIN:
2591 case TargetOpcode::G_SMAX:
2593 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2594 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2595 widenScalarDst(MI, WideTy);
2597 return Legalized;
2598
2599 case TargetOpcode::G_SDIVREM:
2601 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2602 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2603 widenScalarDst(MI, WideTy);
2604 widenScalarDst(MI, WideTy, 1);
2606 return Legalized;
2607
2608 case TargetOpcode::G_ASHR:
2609 case TargetOpcode::G_LSHR:
2611
2612 if (TypeIdx == 0) {
2613 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2614 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2615
2616 widenScalarSrc(MI, WideTy, 1, CvtOp);
2617 widenScalarDst(MI, WideTy);
2618 } else {
2619 assert(TypeIdx == 1);
2620 // The "number of bits to shift" operand must preserve its value as an
2621 // unsigned integer:
2622 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2623 }
2624
2626 return Legalized;
2627 case TargetOpcode::G_UDIV:
2628 case TargetOpcode::G_UREM:
2629 case TargetOpcode::G_UMIN:
2630 case TargetOpcode::G_UMAX:
2632 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2633 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2634 widenScalarDst(MI, WideTy);
2636 return Legalized;
2637
2638 case TargetOpcode::G_UDIVREM:
2640 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2641 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2642 widenScalarDst(MI, WideTy);
2643 widenScalarDst(MI, WideTy, 1);
2645 return Legalized;
2646
2647 case TargetOpcode::G_SELECT:
2649 if (TypeIdx == 0) {
2650 // Perform operation at larger width (any extension is fine here, high
2651 // bits don't affect the result) and then truncate the result back to the
2652 // original type.
2653 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2654 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2655 widenScalarDst(MI, WideTy);
2656 } else {
2657 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2658 // Explicit extension is required here since high bits affect the result.
2659 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2660 }
2662 return Legalized;
2663
2664 case TargetOpcode::G_FPTOSI:
2665 case TargetOpcode::G_FPTOUI:
2666 case TargetOpcode::G_INTRINSIC_LRINT:
2667 case TargetOpcode::G_INTRINSIC_LLRINT:
2668 case TargetOpcode::G_IS_FPCLASS:
2670
2671 if (TypeIdx == 0)
2672 widenScalarDst(MI, WideTy);
2673 else
2674 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2675
2677 return Legalized;
2678 case TargetOpcode::G_SITOFP:
2680
2681 if (TypeIdx == 0)
2682 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2683 else
2684 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2685
2687 return Legalized;
2688 case TargetOpcode::G_UITOFP:
2690
2691 if (TypeIdx == 0)
2692 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2693 else
2694 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2695
2697 return Legalized;
2698 case TargetOpcode::G_LOAD:
2699 case TargetOpcode::G_SEXTLOAD:
2700 case TargetOpcode::G_ZEXTLOAD:
2702 widenScalarDst(MI, WideTy);
2704 return Legalized;
2705
2706 case TargetOpcode::G_STORE: {
2707 if (TypeIdx != 0)
2708 return UnableToLegalize;
2709
2710 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2711 if (!Ty.isScalar())
2712 return UnableToLegalize;
2713
2715
2716 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2717 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2718 widenScalarSrc(MI, WideTy, 0, ExtType);
2719
2721 return Legalized;
2722 }
2723 case TargetOpcode::G_CONSTANT: {
2724 MachineOperand &SrcMO = MI.getOperand(1);
2726 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2727 MRI.getType(MI.getOperand(0).getReg()));
2728 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2729 ExtOpc == TargetOpcode::G_ANYEXT) &&
2730 "Illegal Extend");
2731 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2732 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2733 ? SrcVal.sext(WideTy.getSizeInBits())
2734 : SrcVal.zext(WideTy.getSizeInBits());
2736 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2737
2738 widenScalarDst(MI, WideTy);
2740 return Legalized;
2741 }
2742 case TargetOpcode::G_FCONSTANT: {
2743 // To avoid changing the bits of the constant due to extension to a larger
2744 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2745 MachineOperand &SrcMO = MI.getOperand(1);
2746 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2748 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2749 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2750 MI.eraseFromParent();
2751 return Legalized;
2752 }
2753 case TargetOpcode::G_IMPLICIT_DEF: {
2755 widenScalarDst(MI, WideTy);
2757 return Legalized;
2758 }
2759 case TargetOpcode::G_BRCOND:
2761 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2763 return Legalized;
2764
2765 case TargetOpcode::G_FCMP:
2767 if (TypeIdx == 0)
2768 widenScalarDst(MI, WideTy);
2769 else {
2770 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2771 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2772 }
2774 return Legalized;
2775
2776 case TargetOpcode::G_ICMP:
2778 if (TypeIdx == 0)
2779 widenScalarDst(MI, WideTy);
2780 else {
2781 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2782 MI.getOperand(1).getPredicate()))
2783 ? TargetOpcode::G_SEXT
2784 : TargetOpcode::G_ZEXT;
2785 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2786 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2787 }
2789 return Legalized;
2790
2791 case TargetOpcode::G_PTR_ADD:
2792 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2794 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2796 return Legalized;
2797
2798 case TargetOpcode::G_PHI: {
2799 assert(TypeIdx == 0 && "Expecting only Idx 0");
2800
2802 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2803 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2805 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2806 }
2807
2808 MachineBasicBlock &MBB = *MI.getParent();
2810 widenScalarDst(MI, WideTy);
2812 return Legalized;
2813 }
2814 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2815 if (TypeIdx == 0) {
2816 Register VecReg = MI.getOperand(1).getReg();
2817 LLT VecTy = MRI.getType(VecReg);
2819
2821 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2822 TargetOpcode::G_ANYEXT);
2823
2824 widenScalarDst(MI, WideTy, 0);
2826 return Legalized;
2827 }
2828
2829 if (TypeIdx != 2)
2830 return UnableToLegalize;
2832 // TODO: Probably should be zext
2833 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2835 return Legalized;
2836 }
2837 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2838 if (TypeIdx == 0) {
2840 const LLT WideEltTy = WideTy.getElementType();
2841
2842 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2843 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2844 widenScalarDst(MI, WideTy, 0);
2846 return Legalized;
2847 }
2848
2849 if (TypeIdx == 1) {
2851
2852 Register VecReg = MI.getOperand(1).getReg();
2853 LLT VecTy = MRI.getType(VecReg);
2854 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2855
2856 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2857 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2858 widenScalarDst(MI, WideVecTy, 0);
2860 return Legalized;
2861 }
2862
2863 if (TypeIdx == 2) {
2865 // TODO: Probably should be zext
2866 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2868 return Legalized;
2869 }
2870
2871 return UnableToLegalize;
2872 }
2873 case TargetOpcode::G_FADD:
2874 case TargetOpcode::G_FMUL:
2875 case TargetOpcode::G_FSUB:
2876 case TargetOpcode::G_FMA:
2877 case TargetOpcode::G_FMAD:
2878 case TargetOpcode::G_FNEG:
2879 case TargetOpcode::G_FABS:
2880 case TargetOpcode::G_FCANONICALIZE:
2881 case TargetOpcode::G_FMINNUM:
2882 case TargetOpcode::G_FMAXNUM:
2883 case TargetOpcode::G_FMINNUM_IEEE:
2884 case TargetOpcode::G_FMAXNUM_IEEE:
2885 case TargetOpcode::G_FMINIMUM:
2886 case TargetOpcode::G_FMAXIMUM:
2887 case TargetOpcode::G_FDIV:
2888 case TargetOpcode::G_FREM:
2889 case TargetOpcode::G_FCEIL:
2890 case TargetOpcode::G_FFLOOR:
2891 case TargetOpcode::G_FCOS:
2892 case TargetOpcode::G_FSIN:
2893 case TargetOpcode::G_FLOG10:
2894 case TargetOpcode::G_FLOG:
2895 case TargetOpcode::G_FLOG2:
2896 case TargetOpcode::G_FRINT:
2897 case TargetOpcode::G_FNEARBYINT:
2898 case TargetOpcode::G_FSQRT:
2899 case TargetOpcode::G_FEXP:
2900 case TargetOpcode::G_FEXP2:
2901 case TargetOpcode::G_FEXP10:
2902 case TargetOpcode::G_FPOW:
2903 case TargetOpcode::G_INTRINSIC_TRUNC:
2904 case TargetOpcode::G_INTRINSIC_ROUND:
2905 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2906 assert(TypeIdx == 0);
2908
2909 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2910 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2911
2912 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2914 return Legalized;
2915 case TargetOpcode::G_FPOWI:
2916 case TargetOpcode::G_FLDEXP:
2917 case TargetOpcode::G_STRICT_FLDEXP: {
2918 if (TypeIdx == 0) {
2919 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2920 return UnableToLegalize;
2921
2923 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2924 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2926 return Legalized;
2927 }
2928
2929 if (TypeIdx == 1) {
2930 // For some reason SelectionDAG tries to promote to a libcall without
2931 // actually changing the integer type for promotion.
2933 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2935 return Legalized;
2936 }
2937
2938 return UnableToLegalize;
2939 }
2940 case TargetOpcode::G_FFREXP: {
2942
2943 if (TypeIdx == 0) {
2944 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2945 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2946 } else {
2947 widenScalarDst(MI, WideTy, 1);
2948 }
2949
2951 return Legalized;
2952 }
2953 case TargetOpcode::G_INTTOPTR:
2954 if (TypeIdx != 1)
2955 return UnableToLegalize;
2956
2958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2960 return Legalized;
2961 case TargetOpcode::G_PTRTOINT:
2962 if (TypeIdx != 0)
2963 return UnableToLegalize;
2964
2966 widenScalarDst(MI, WideTy, 0);
2968 return Legalized;
2969 case TargetOpcode::G_BUILD_VECTOR: {
2971
2972 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2973 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2974 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2975
2976 // Avoid changing the result vector type if the source element type was
2977 // requested.
2978 if (TypeIdx == 1) {
2979 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2980 } else {
2981 widenScalarDst(MI, WideTy, 0);
2982 }
2983
2985 return Legalized;
2986 }
2987 case TargetOpcode::G_SEXT_INREG:
2988 if (TypeIdx != 0)
2989 return UnableToLegalize;
2990
2992 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2993 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2995 return Legalized;
2996 case TargetOpcode::G_PTRMASK: {
2997 if (TypeIdx != 1)
2998 return UnableToLegalize;
3000 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3002 return Legalized;
3003 }
3004 case TargetOpcode::G_VECREDUCE_FADD:
3005 case TargetOpcode::G_VECREDUCE_FMUL:
3006 case TargetOpcode::G_VECREDUCE_FMIN:
3007 case TargetOpcode::G_VECREDUCE_FMAX:
3008 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3009 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3010 if (TypeIdx != 0)
3011 return UnableToLegalize;
3013 Register VecReg = MI.getOperand(1).getReg();
3014 LLT VecTy = MRI.getType(VecReg);
3015 LLT WideVecTy = VecTy.isVector()
3016 ? LLT::vector(VecTy.getElementCount(), WideTy)
3017 : WideTy;
3018 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3019 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3021 return Legalized;
3022 }
3023 case TargetOpcode::G_VSCALE: {
3024 MachineOperand &SrcMO = MI.getOperand(1);
3026 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3027 // The CImm is always a signed value
3028 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3030 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3031 widenScalarDst(MI, WideTy);
3033 return Legalized;
3034 }
3035 case TargetOpcode::G_SPLAT_VECTOR: {
3036 if (TypeIdx != 1)
3037 return UnableToLegalize;
3038
3040 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3042 return Legalized;
3043 }
3044 }
3045}
3046
3048 MachineIRBuilder &B, Register Src, LLT Ty) {
3049 auto Unmerge = B.buildUnmerge(Ty, Src);
3050 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3051 Pieces.push_back(Unmerge.getReg(I));
3052}
3053
3054static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3055 MachineIRBuilder &MIRBuilder) {
3056 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3057 MachineFunction &MF = MIRBuilder.getMF();
3058 const DataLayout &DL = MIRBuilder.getDataLayout();
3059 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3060 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3061 LLT DstLLT = MRI.getType(DstReg);
3062
3063 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3064
3065 auto Addr = MIRBuilder.buildConstantPool(
3066 AddrPtrTy,
3067 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3068
3069 MachineMemOperand *MMO =
3071 MachineMemOperand::MOLoad, DstLLT, Alignment);
3072
3073 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3074}
3075
3078 const MachineOperand &ConstOperand = MI.getOperand(1);
3079 const Constant *ConstantVal = ConstOperand.getCImm();
3080
3081 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3082 MI.eraseFromParent();
3083
3084 return Legalized;
3085}
3086
3089 const MachineOperand &ConstOperand = MI.getOperand(1);
3090 const Constant *ConstantVal = ConstOperand.getFPImm();
3091
3092 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3093 MI.eraseFromParent();
3094
3095 return Legalized;
3096}
3097
3100 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3101 if (SrcTy.isVector()) {
3102 LLT SrcEltTy = SrcTy.getElementType();
3104
3105 if (DstTy.isVector()) {
3106 int NumDstElt = DstTy.getNumElements();
3107 int NumSrcElt = SrcTy.getNumElements();
3108
3109 LLT DstEltTy = DstTy.getElementType();
3110 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3111 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3112
3113 // If there's an element size mismatch, insert intermediate casts to match
3114 // the result element type.
3115 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3116 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3117 //
3118 // =>
3119 //
3120 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3121 // %3:_(<2 x s8>) = G_BITCAST %2
3122 // %4:_(<2 x s8>) = G_BITCAST %3
3123 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3124 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3125 SrcPartTy = SrcEltTy;
3126 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3127 //
3128 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3129 //
3130 // =>
3131 //
3132 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3133 // %3:_(s16) = G_BITCAST %2
3134 // %4:_(s16) = G_BITCAST %3
3135 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3136 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3137 DstCastTy = DstEltTy;
3138 }
3139
3140 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3141 for (Register &SrcReg : SrcRegs)
3142 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3143 } else
3144 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3145
3146 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3147 MI.eraseFromParent();
3148 return Legalized;
3149 }
3150
3151 if (DstTy.isVector()) {
3153 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3154 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3155 MI.eraseFromParent();
3156 return Legalized;
3157 }
3158
3159 return UnableToLegalize;
3160}
3161
3162/// Figure out the bit offset into a register when coercing a vector index for
3163/// the wide element type. This is only for the case when promoting vector to
3164/// one with larger elements.
3165//
3166///
3167/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3168/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3170 Register Idx,
3171 unsigned NewEltSize,
3172 unsigned OldEltSize) {
3173 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3174 LLT IdxTy = B.getMRI()->getType(Idx);
3175
3176 // Now figure out the amount we need to shift to get the target bits.
3177 auto OffsetMask = B.buildConstant(
3178 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3179 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3180 return B.buildShl(IdxTy, OffsetIdx,
3181 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3182}
3183
3184/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3185/// is casting to a vector with a smaller element size, perform multiple element
3186/// extracts and merge the results. If this is coercing to a vector with larger
3187/// elements, index the bitcasted vector and extract the target element with bit
3188/// operations. This is intended to force the indexing in the native register
3189/// size for architectures that can dynamically index the register file.
3192 LLT CastTy) {
3193 if (TypeIdx != 1)
3194 return UnableToLegalize;
3195
3196 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3197
3198 LLT SrcEltTy = SrcVecTy.getElementType();
3199 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3200 unsigned OldNumElts = SrcVecTy.getNumElements();
3201
3202 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3203 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3204
3205 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3206 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3207 if (NewNumElts > OldNumElts) {
3208 // Decreasing the vector element size
3209 //
3210 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3211 // =>
3212 // v4i32:castx = bitcast x:v2i64
3213 //
3214 // i64 = bitcast
3215 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3216 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3217 //
3218 if (NewNumElts % OldNumElts != 0)
3219 return UnableToLegalize;
3220
3221 // Type of the intermediate result vector.
3222 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3223 LLT MidTy =
3224 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3225
3226 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3227
3228 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3229 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3230
3231 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3232 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3233 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3234 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3235 NewOps[I] = Elt.getReg(0);
3236 }
3237
3238 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3239 MIRBuilder.buildBitcast(Dst, NewVec);
3240 MI.eraseFromParent();
3241 return Legalized;
3242 }
3243
3244 if (NewNumElts < OldNumElts) {
3245 if (NewEltSize % OldEltSize != 0)
3246 return UnableToLegalize;
3247
3248 // This only depends on powers of 2 because we use bit tricks to figure out
3249 // the bit offset we need to shift to get the target element. A general
3250 // expansion could emit division/multiply.
3251 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3252 return UnableToLegalize;
3253
3254 // Increasing the vector element size.
3255 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3256 //
3257 // =>
3258 //
3259 // %cast = G_BITCAST %vec
3260 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3261 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3262 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3263 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3264 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3265 // %elt = G_TRUNC %elt_bits
3266
3267 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3268 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3269
3270 // Divide to get the index in the wider element type.
3271 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3272
3273 Register WideElt = CastVec;
3274 if (CastTy.isVector()) {
3275 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3276 ScaledIdx).getReg(0);
3277 }
3278
3279 // Compute the bit offset into the register of the target element.
3281 MIRBuilder, Idx, NewEltSize, OldEltSize);
3282
3283 // Shift the wide element to get the target element.
3284 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3285 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3286 MI.eraseFromParent();
3287 return Legalized;
3288 }
3289
3290 return UnableToLegalize;
3291}
3292
3293/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3294/// TargetReg, while preserving other bits in \p TargetReg.
3295///
3296/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3298 Register TargetReg, Register InsertReg,
3299 Register OffsetBits) {
3300 LLT TargetTy = B.getMRI()->getType(TargetReg);
3301 LLT InsertTy = B.getMRI()->getType(InsertReg);
3302 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3303 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3304
3305 // Produce a bitmask of the value to insert
3306 auto EltMask = B.buildConstant(
3307 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3308 InsertTy.getSizeInBits()));
3309 // Shift it into position
3310 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3311 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3312
3313 // Clear out the bits in the wide element
3314 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3315
3316 // The value to insert has all zeros already, so stick it into the masked
3317 // wide element.
3318 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3319}
3320
3321/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3322/// is increasing the element size, perform the indexing in the target element
3323/// type, and use bit operations to insert at the element position. This is
3324/// intended for architectures that can dynamically index the register file and
3325/// want to force indexing in the native register size.
3328 LLT CastTy) {
3329 if (TypeIdx != 0)
3330 return UnableToLegalize;
3331
3332 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3333 MI.getFirst4RegLLTs();
3334 LLT VecTy = DstTy;
3335
3336 LLT VecEltTy = VecTy.getElementType();
3337 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3338 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3339 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3340
3341 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3342 unsigned OldNumElts = VecTy.getNumElements();
3343
3344 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3345 if (NewNumElts < OldNumElts) {
3346 if (NewEltSize % OldEltSize != 0)
3347 return UnableToLegalize;
3348
3349 // This only depends on powers of 2 because we use bit tricks to figure out
3350 // the bit offset we need to shift to get the target element. A general
3351 // expansion could emit division/multiply.
3352 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3353 return UnableToLegalize;
3354
3355 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3356 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3357
3358 // Divide to get the index in the wider element type.
3359 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3360
3361 Register ExtractedElt = CastVec;
3362 if (CastTy.isVector()) {
3363 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3364 ScaledIdx).getReg(0);
3365 }
3366
3367 // Compute the bit offset into the register of the target element.
3369 MIRBuilder, Idx, NewEltSize, OldEltSize);
3370
3371 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3372 Val, OffsetBits);
3373 if (CastTy.isVector()) {
3375 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3376 }
3377
3378 MIRBuilder.buildBitcast(Dst, InsertedElt);
3379 MI.eraseFromParent();
3380 return Legalized;
3381 }
3382
3383 return UnableToLegalize;
3384}
3385
3387 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3388 Register DstReg = LoadMI.getDstReg();
3389 Register PtrReg = LoadMI.getPointerReg();
3390 LLT DstTy = MRI.getType(DstReg);
3391 MachineMemOperand &MMO = LoadMI.getMMO();
3392 LLT MemTy = MMO.getMemoryType();
3394
3395 unsigned MemSizeInBits = MemTy.getSizeInBits();
3396 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3397
3398 if (MemSizeInBits != MemStoreSizeInBits) {
3399 if (MemTy.isVector())
3400 return UnableToLegalize;
3401
3402 // Promote to a byte-sized load if not loading an integral number of
3403 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3404 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3405 MachineMemOperand *NewMMO =
3406 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3407
3408 Register LoadReg = DstReg;
3409 LLT LoadTy = DstTy;
3410
3411 // If this wasn't already an extending load, we need to widen the result
3412 // register to avoid creating a load with a narrower result than the source.
3413 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3414 LoadTy = WideMemTy;
3415 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3416 }
3417
3418 if (isa<GSExtLoad>(LoadMI)) {
3419 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3420 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3421 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3422 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3423 // The extra bits are guaranteed to be zero, since we stored them that
3424 // way. A zext load from Wide thus automatically gives zext from MemVT.
3425 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3426 } else {
3427 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3428 }
3429
3430 if (DstTy != LoadTy)
3431 MIRBuilder.buildTrunc(DstReg, LoadReg);
3432
3433 LoadMI.eraseFromParent();
3434 return Legalized;
3435 }
3436
3437 // Big endian lowering not implemented.
3439 return UnableToLegalize;
3440
3441 // This load needs splitting into power of 2 sized loads.
3442 //
3443 // Our strategy here is to generate anyextending loads for the smaller
3444 // types up to next power-2 result type, and then combine the two larger
3445 // result values together, before truncating back down to the non-pow-2
3446 // type.
3447 // E.g. v1 = i24 load =>
3448 // v2 = i32 zextload (2 byte)
3449 // v3 = i32 load (1 byte)
3450 // v4 = i32 shl v3, 16
3451 // v5 = i32 or v4, v2
3452 // v1 = i24 trunc v5
3453 // By doing this we generate the correct truncate which should get
3454 // combined away as an artifact with a matching extend.
3455
3456 uint64_t LargeSplitSize, SmallSplitSize;
3457
3458 if (!isPowerOf2_32(MemSizeInBits)) {
3459 // This load needs splitting into power of 2 sized loads.
3460 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3461 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3462 } else {
3463 // This is already a power of 2, but we still need to split this in half.
3464 //
3465 // Assume we're being asked to decompose an unaligned load.
3466 // TODO: If this requires multiple splits, handle them all at once.
3467 auto &Ctx = MF.getFunction().getContext();
3468 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3469 return UnableToLegalize;
3470
3471 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3472 }
3473
3474 if (MemTy.isVector()) {
3475 // TODO: Handle vector extloads
3476 if (MemTy != DstTy)
3477 return UnableToLegalize;
3478
3479 // TODO: We can do better than scalarizing the vector and at least split it
3480 // in half.
3481 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3482 }
3483
3484 MachineMemOperand *LargeMMO =
3485 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3486 MachineMemOperand *SmallMMO =
3487 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3488
3489 LLT PtrTy = MRI.getType(PtrReg);
3490 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3491 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3492 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3493 PtrReg, *LargeMMO);
3494
3495 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3496 LargeSplitSize / 8);
3497 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3498 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3499 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3500 SmallPtr, *SmallMMO);
3501
3502 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3503 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3504
3505 if (AnyExtTy == DstTy)
3506 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3507 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3508 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3509 MIRBuilder.buildTrunc(DstReg, {Or});
3510 } else {
3511 assert(DstTy.isPointer() && "expected pointer");
3512 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3513
3514 // FIXME: We currently consider this to be illegal for non-integral address
3515 // spaces, but we need still need a way to reinterpret the bits.
3516 MIRBuilder.buildIntToPtr(DstReg, Or);
3517 }
3518
3519 LoadMI.eraseFromParent();
3520 return Legalized;
3521}
3522
3524 // Lower a non-power of 2 store into multiple pow-2 stores.
3525 // E.g. split an i24 store into an i16 store + i8 store.
3526 // We do this by first extending the stored value to the next largest power
3527 // of 2 type, and then using truncating stores to store the components.
3528 // By doing this, likewise with G_LOAD, generate an extend that can be
3529 // artifact-combined away instead of leaving behind extracts.
3530 Register SrcReg = StoreMI.getValueReg();
3531 Register PtrReg = StoreMI.getPointerReg();
3532 LLT SrcTy = MRI.getType(SrcReg);
3534 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3535 LLT MemTy = MMO.getMemoryType();
3536
3537 unsigned StoreWidth = MemTy.getSizeInBits();
3538 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3539
3540 if (StoreWidth != StoreSizeInBits) {
3541 if (SrcTy.isVector())
3542 return UnableToLegalize;
3543
3544 // Promote to a byte-sized store with upper bits zero if not
3545 // storing an integral number of bytes. For example, promote
3546 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3547 LLT WideTy = LLT::scalar(StoreSizeInBits);
3548
3549 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3550 // Avoid creating a store with a narrower source than result.
3551 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3552 SrcTy = WideTy;
3553 }
3554
3555 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3556
3557 MachineMemOperand *NewMMO =
3558 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3559 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3560 StoreMI.eraseFromParent();
3561 return Legalized;
3562 }
3563
3564 if (MemTy.isVector()) {
3565 // TODO: Handle vector trunc stores
3566 if (MemTy != SrcTy)
3567 return UnableToLegalize;
3568
3569 // TODO: We can do better than scalarizing the vector and at least split it
3570 // in half.
3571 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3572 }
3573
3574 unsigned MemSizeInBits = MemTy.getSizeInBits();
3575 uint64_t LargeSplitSize, SmallSplitSize;
3576
3577 if (!isPowerOf2_32(MemSizeInBits)) {
3578 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3579 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3580 } else {
3581 auto &Ctx = MF.getFunction().getContext();
3582 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3583 return UnableToLegalize; // Don't know what we're being asked to do.
3584
3585 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3586 }
3587
3588 // Extend to the next pow-2. If this store was itself the result of lowering,
3589 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3590 // that's wider than the stored size.
3591 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3592 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3593
3594 if (SrcTy.isPointer()) {
3595 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3596 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3597 }
3598
3599 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3600
3601 // Obtain the smaller value by shifting away the larger value.
3602 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3603 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3604
3605 // Generate the PtrAdd and truncating stores.
3606 LLT PtrTy = MRI.getType(PtrReg);
3607 auto OffsetCst = MIRBuilder.buildConstant(
3608 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3609 auto SmallPtr =
3610 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3611
3612 MachineMemOperand *LargeMMO =
3613 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3614 MachineMemOperand *SmallMMO =
3615 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3616 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3617 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3618 StoreMI.eraseFromParent();
3619 return Legalized;
3620}
3621
3623LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3624 switch (MI.getOpcode()) {
3625 case TargetOpcode::G_LOAD: {
3626 if (TypeIdx != 0)
3627 return UnableToLegalize;
3628 MachineMemOperand &MMO = **MI.memoperands_begin();
3629
3630 // Not sure how to interpret a bitcast of an extending load.
3631 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3632 return UnableToLegalize;
3633
3635 bitcastDst(MI, CastTy, 0);
3636 MMO.setType(CastTy);
3638 return Legalized;
3639 }
3640 case TargetOpcode::G_STORE: {
3641 if (TypeIdx != 0)
3642 return UnableToLegalize;
3643
3644 MachineMemOperand &MMO = **MI.memoperands_begin();
3645
3646 // Not sure how to interpret a bitcast of a truncating store.
3647 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3648 return UnableToLegalize;
3649
3651 bitcastSrc(MI, CastTy, 0);
3652 MMO.setType(CastTy);
3654 return Legalized;
3655 }
3656 case TargetOpcode::G_SELECT: {
3657 if (TypeIdx != 0)
3658 return UnableToLegalize;
3659
3660 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3661 LLVM_DEBUG(
3662 dbgs() << "bitcast action not implemented for vector select\n");
3663 return UnableToLegalize;
3664 }
3665
3667 bitcastSrc(MI, CastTy, 2);
3668 bitcastSrc(MI, CastTy, 3);
3669 bitcastDst(MI, CastTy, 0);
3671 return Legalized;
3672 }
3673 case TargetOpcode::G_AND:
3674 case TargetOpcode::G_OR:
3675 case TargetOpcode::G_XOR: {
3677 bitcastSrc(MI, CastTy, 1);
3678 bitcastSrc(MI, CastTy, 2);
3679 bitcastDst(MI, CastTy, 0);
3681 return Legalized;
3682 }
3683 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3684 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3685 case TargetOpcode::G_INSERT_VECTOR_ELT:
3686 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3687 default:
3688 return UnableToLegalize;
3689 }
3690}
3691
3692// Legalize an instruction by changing the opcode in place.
3693void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3695 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3697}
3698
3700LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3701 using namespace TargetOpcode;
3702
3703 switch(MI.getOpcode()) {
3704 default:
3705 return UnableToLegalize;
3706 case TargetOpcode::G_FCONSTANT:
3707 return lowerFConstant(MI);
3708 case TargetOpcode::G_BITCAST:
3709 return lowerBitcast(MI);
3710 case TargetOpcode::G_SREM:
3711 case TargetOpcode::G_UREM: {
3712 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3713 auto Quot =
3714 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3715 {MI.getOperand(1), MI.getOperand(2)});
3716
3717 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3718 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3719 MI.eraseFromParent();
3720 return Legalized;
3721 }
3722 case TargetOpcode::G_SADDO:
3723 case TargetOpcode::G_SSUBO:
3724 return lowerSADDO_SSUBO(MI);
3725 case TargetOpcode::G_UMULH:
3726 case TargetOpcode::G_SMULH:
3727 return lowerSMULH_UMULH(MI);
3728 case TargetOpcode::G_SMULO:
3729 case TargetOpcode::G_UMULO: {
3730 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3731 // result.
3732 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3733 LLT Ty = MRI.getType(Res);
3734
3735 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3736 ? TargetOpcode::G_SMULH
3737 : TargetOpcode::G_UMULH;
3738
3740 const auto &TII = MIRBuilder.getTII();
3741 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3742 MI.removeOperand(1);
3744
3745 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3746 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3747
3748 // Move insert point forward so we can use the Res register if needed.
3750
3751 // For *signed* multiply, overflow is detected by checking:
3752 // (hi != (lo >> bitwidth-1))
3753 if (Opcode == TargetOpcode::G_SMULH) {
3754 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3755 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3756 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3757 } else {
3758 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3759 }
3760 return Legalized;
3761 }
3762 case TargetOpcode::G_FNEG: {
3763 auto [Res, SubByReg] = MI.getFirst2Regs();
3764 LLT Ty = MRI.getType(Res);
3765
3766 // TODO: Handle vector types once we are able to
3767 // represent them.
3768 if (Ty.isVector())
3769 return UnableToLegalize;
3770 auto SignMask =
3772 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3773 MI.eraseFromParent();
3774 return Legalized;
3775 }
3776 case TargetOpcode::G_FSUB:
3777 case TargetOpcode::G_STRICT_FSUB: {
3778 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3779 LLT Ty = MRI.getType(Res);
3780
3781 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3782 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3783
3784 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3785 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3786 else
3787 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3788
3789 MI.eraseFromParent();
3790 return Legalized;
3791 }
3792 case TargetOpcode::G_FMAD:
3793 return lowerFMad(MI);
3794 case TargetOpcode::G_FFLOOR:
3795 return lowerFFloor(MI);
3796 case TargetOpcode::G_INTRINSIC_ROUND:
3797 return lowerIntrinsicRound(MI);
3798 case TargetOpcode::G_FRINT: {
3799 // Since round even is the assumed rounding mode for unconstrained FP
3800 // operations, rint and roundeven are the same operation.
3801 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3802 return Legalized;
3803 }
3804 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3805 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3806 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3807 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3808 **MI.memoperands_begin());
3809 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3810 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3811 MI.eraseFromParent();
3812 return Legalized;
3813 }
3814 case TargetOpcode::G_LOAD:
3815 case TargetOpcode::G_SEXTLOAD:
3816 case TargetOpcode::G_ZEXTLOAD:
3817 return lowerLoad(cast<GAnyLoad>(MI));
3818 case TargetOpcode::G_STORE:
3819 return lowerStore(cast<GStore>(MI));
3820 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3821 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3822 case TargetOpcode::G_CTLZ:
3823 case TargetOpcode::G_CTTZ:
3824 case TargetOpcode::G_CTPOP:
3825 return lowerBitCount(MI);
3826 case G_UADDO: {
3827 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3828
3829 Register NewRes = MRI.cloneVirtualRegister(Res);
3830
3831 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3832 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3833
3834 MIRBuilder.buildCopy(Res, NewRes);
3835
3836 MI.eraseFromParent();
3837 return Legalized;
3838 }
3839 case G_UADDE: {
3840 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3841 const LLT CondTy = MRI.getType(CarryOut);
3842 const LLT Ty = MRI.getType(Res);
3843
3844 Register NewRes = MRI.cloneVirtualRegister(Res);
3845
3846 // Initial add of the two operands.
3847 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3848
3849 // Initial check for carry.
3850 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3851
3852 // Add the sum and the carry.
3853 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3854 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3855
3856 // Second check for carry. We can only carry if the initial sum is all 1s
3857 // and the carry is set, resulting in a new sum of 0.
3858 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3859 auto ResEqZero =
3860 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3861 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3862 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3863
3864 MIRBuilder.buildCopy(Res, NewRes);
3865
3866 MI.eraseFromParent();
3867 return Legalized;
3868 }
3869 case G_USUBO: {
3870 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3871
3872 MIRBuilder.buildSub(Res, LHS, RHS);
3874
3875 MI.eraseFromParent();
3876 return Legalized;
3877 }
3878 case G_USUBE: {
3879 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3880 const LLT CondTy = MRI.getType(BorrowOut);
3881 const LLT Ty = MRI.getType(Res);
3882
3883 // Initial subtract of the two operands.
3884 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3885
3886 // Initial check for borrow.
3887 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3888
3889 // Subtract the borrow from the first subtract.
3890 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3891 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3892
3893 // Second check for borrow. We can only borrow if the initial difference is
3894 // 0 and the borrow is set, resulting in a new difference of all 1s.
3895 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3896 auto TmpResEqZero =
3897 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3898 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3899 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3900
3901 MI.eraseFromParent();
3902 return Legalized;
3903 }
3904 case G_UITOFP:
3905 return lowerUITOFP(MI);
3906 case G_SITOFP:
3907 return lowerSITOFP(MI);
3908 case G_FPTOUI:
3909 return lowerFPTOUI(MI);
3910 case G_FPTOSI:
3911 return lowerFPTOSI(MI);
3912 case G_FPTRUNC:
3913 return lowerFPTRUNC(MI);
3914 case G_FPOWI:
3915 return lowerFPOWI(MI);
3916 case G_SMIN:
3917 case G_SMAX:
3918 case G_UMIN:
3919 case G_UMAX:
3920 return lowerMinMax(MI);
3921 case G_FCOPYSIGN:
3922 return lowerFCopySign(MI);
3923 case G_FMINNUM:
3924 case G_FMAXNUM:
3925 return lowerFMinNumMaxNum(MI);
3926 case G_MERGE_VALUES:
3927 return lowerMergeValues(MI);
3928 case G_UNMERGE_VALUES:
3929 return lowerUnmergeValues(MI);
3930 case TargetOpcode::G_SEXT_INREG: {
3931 assert(MI.getOperand(2).isImm() && "Expected immediate");
3932 int64_t SizeInBits = MI.getOperand(2).getImm();
3933
3934 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3935 LLT DstTy = MRI.getType(DstReg);
3936 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3937
3938 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3939 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3940 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3941 MI.eraseFromParent();
3942 return Legalized;
3943 }
3944 case G_EXTRACT_VECTOR_ELT:
3945 case G_INSERT_VECTOR_ELT:
3947 case G_SHUFFLE_VECTOR:
3948 return lowerShuffleVector(MI);
3949 case G_DYN_STACKALLOC:
3950 return lowerDynStackAlloc(MI);
3951 case G_STACKSAVE:
3952 return lowerStackSave(MI);
3953 case G_STACKRESTORE:
3954 return lowerStackRestore(MI);
3955 case G_EXTRACT:
3956 return lowerExtract(MI);
3957 case G_INSERT:
3958 return lowerInsert(MI);
3959 case G_BSWAP:
3960 return lowerBswap(MI);
3961 case G_BITREVERSE:
3962 return lowerBitreverse(MI);
3963 case G_READ_REGISTER:
3964 case G_WRITE_REGISTER:
3965 return lowerReadWriteRegister(MI);
3966 case G_UADDSAT:
3967 case G_USUBSAT: {
3968 // Try to make a reasonable guess about which lowering strategy to use. The
3969 // target can override this with custom lowering and calling the
3970 // implementation functions.
3971 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3972 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3973 return lowerAddSubSatToMinMax(MI);
3975 }
3976 case G_SADDSAT:
3977 case G_SSUBSAT: {
3978 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3979
3980 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3981 // since it's a shorter expansion. However, we would need to figure out the
3982 // preferred boolean type for the carry out for the query.
3983 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3984 return lowerAddSubSatToMinMax(MI);
3986 }
3987 case G_SSHLSAT:
3988 case G_USHLSAT:
3989 return lowerShlSat(MI);
3990 case G_ABS:
3991 return lowerAbsToAddXor(MI);
3992 case G_SELECT:
3993 return lowerSelect(MI);
3994 case G_IS_FPCLASS:
3995 return lowerISFPCLASS(MI);
3996 case G_SDIVREM:
3997 case G_UDIVREM:
3998 return lowerDIVREM(MI);
3999 case G_FSHL:
4000 case G_FSHR:
4001 return lowerFunnelShift(MI);
4002 case G_ROTL:
4003 case G_ROTR:
4004 return lowerRotate(MI);
4005 case G_MEMSET:
4006 case G_MEMCPY:
4007 case G_MEMMOVE:
4008 return lowerMemCpyFamily(MI);
4009 case G_MEMCPY_INLINE:
4010 return lowerMemcpyInline(MI);
4011 case G_ZEXT:
4012 case G_SEXT:
4013 case G_ANYEXT:
4014 return lowerEXT(MI);
4015 case G_TRUNC:
4016 return lowerTRUNC(MI);
4018 return lowerVectorReduction(MI);
4019 case G_VAARG:
4020 return lowerVAArg(MI);
4021 }
4022}
4023
4025 Align MinAlign) const {
4026 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4027 // datalayout for the preferred alignment. Also there should be a target hook
4028 // for this to allow targets to reduce the alignment and ignore the
4029 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4030 // the type.
4031 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4032}
4033
4036 MachinePointerInfo &PtrInfo) {
4039 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4040
4041 unsigned AddrSpace = DL.getAllocaAddrSpace();
4042 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4043
4044 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4045 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4046}
4047
4049 LLT VecTy) {
4050 LLT IdxTy = B.getMRI()->getType(IdxReg);
4051 unsigned NElts = VecTy.getNumElements();
4052
4053 int64_t IdxVal;
4054 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4055 if (IdxVal < VecTy.getNumElements())
4056 return IdxReg;
4057 // If a constant index would be out of bounds, clamp it as well.
4058 }
4059
4060 if (isPowerOf2_32(NElts)) {
4061 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4062 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4063 }
4064
4065 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4066 .getReg(0);
4067}
4068
4070 Register Index) {
4071 LLT EltTy = VecTy.getElementType();
4072
4073 // Calculate the element offset and add it to the pointer.
4074 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4075 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4076 "Converting bits to bytes lost precision");
4077
4079
4080 // Convert index to the correct size for the address space.
4082 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4083 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4084 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4085 if (IdxTy != MRI.getType(Index))
4087
4088 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4089 MIRBuilder.buildConstant(IdxTy, EltSize));
4090
4091 LLT PtrTy = MRI.getType(VecPtr);
4092 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4093}
4094
4095#ifndef NDEBUG
4096/// Check that all vector operands have same number of elements. Other operands
4097/// should be listed in NonVecOp.
4100 std::initializer_list<unsigned> NonVecOpIndices) {
4101 if (MI.getNumMemOperands() != 0)
4102 return false;
4103
4104 LLT VecTy = MRI.getType(MI.getReg(0));
4105 if (!VecTy.isVector())
4106 return false;
4107 unsigned NumElts = VecTy.getNumElements();
4108
4109 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4110 MachineOperand &Op = MI.getOperand(OpIdx);
4111 if (!Op.isReg()) {
4112 if (!is_contained(NonVecOpIndices, OpIdx))
4113 return false;
4114 continue;
4115 }
4116
4117 LLT Ty = MRI.getType(Op.getReg());
4118 if (!Ty.isVector()) {
4119 if (!is_contained(NonVecOpIndices, OpIdx))
4120 return false;
4121 continue;
4122 }
4123
4124 if (Ty.getNumElements() != NumElts)
4125 return false;
4126 }
4127
4128 return true;
4129}
4130#endif
4131
4132/// Fill \p DstOps with DstOps that have same number of elements combined as
4133/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4134/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4135/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4136static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4137 unsigned NumElts) {
4138 LLT LeftoverTy;
4139 assert(Ty.isVector() && "Expected vector type");
4140 LLT EltTy = Ty.getElementType();
4141 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4142 int NumParts, NumLeftover;
4143 std::tie(NumParts, NumLeftover) =
4144 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4145
4146 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4147 for (int i = 0; i < NumParts; ++i) {
4148 DstOps.push_back(NarrowTy);
4149 }
4150
4151 if (LeftoverTy.isValid()) {
4152 assert(NumLeftover == 1 && "expected exactly one leftover");
4153 DstOps.push_back(LeftoverTy);
4154 }
4155}
4156
4157/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4158/// made from \p Op depending on operand type.
4159static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4160 MachineOperand &Op) {
4161 for (unsigned i = 0; i < N; ++i) {
4162 if (Op.isReg())
4163 Ops.push_back(Op.getReg());
4164 else if (Op.isImm())
4165 Ops.push_back(Op.getImm());
4166 else if (Op.isPredicate())
4167 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4168 else
4169 llvm_unreachable("Unsupported type");
4170 }
4171}
4172
4173// Handle splitting vector operations which need to have the same number of
4174// elements in each type index, but each type index may have a different element
4175// type.
4176//
4177// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4178// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4179// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4180//
4181// Also handles some irregular breakdown cases, e.g.
4182// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4183// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4184// s64 = G_SHL s64, s32
4187 GenericMachineInstr &MI, unsigned NumElts,
4188 std::initializer_list<unsigned> NonVecOpIndices) {
4189 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4190 "Non-compatible opcode or not specified non-vector operands");
4191 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4192
4193 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4194 unsigned NumDefs = MI.getNumDefs();
4195
4196 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4197 // Build instructions with DstOps to use instruction found by CSE directly.
4198 // CSE copies found instruction into given vreg when building with vreg dest.
4199 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4200 // Output registers will be taken from created instructions.
4201 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4202 for (unsigned i = 0; i < NumDefs; ++i) {
4203 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4204 }
4205
4206 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4207 // Operands listed in NonVecOpIndices will be used as is without splitting;
4208 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4209 // scalar condition (op 1), immediate in sext_inreg (op 2).
4210 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4211 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4212 ++UseIdx, ++UseNo) {
4213 if (is_contained(NonVecOpIndices, UseIdx)) {
4214 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4215 MI.getOperand(UseIdx));
4216 } else {
4217 SmallVector<Register, 8> SplitPieces;
4218 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4219 MRI);
4220 for (auto Reg : SplitPieces)
4221 InputOpsPieces[UseNo].push_back(Reg);
4222 }
4223 }
4224
4225 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4226
4227 // Take i-th piece of each input operand split and build sub-vector/scalar
4228 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4229 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4231 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4232 Defs.push_back(OutputOpsPieces[DstNo][i]);
4233
4235 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4236 Uses.push_back(InputOpsPieces[InputNo][i]);
4237
4238 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4239 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4240 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4241 }
4242
4243 // Merge small outputs into MI's output for each def operand.
4244 if (NumLeftovers) {
4245 for (unsigned i = 0; i < NumDefs; ++i)
4246 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4247 } else {
4248 for (unsigned i = 0; i < NumDefs; ++i)
4249 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4250 }
4251
4252 MI.eraseFromParent();
4253 return Legalized;
4254}
4255
4258 unsigned NumElts) {
4259 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4260
4261 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4262 unsigned NumDefs = MI.getNumDefs();
4263
4264 SmallVector<DstOp, 8> OutputOpsPieces;
4265 SmallVector<Register, 8> OutputRegs;
4266 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4267
4268 // Instructions that perform register split will be inserted in basic block
4269 // where register is defined (basic block is in the next operand).
4270 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4271 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4272 UseIdx += 2, ++UseNo) {
4273 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4275 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4276 MIRBuilder, MRI);
4277 }
4278
4279 // Build PHIs with fewer elements.
4280 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4281 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4282 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4283 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4284 Phi.addDef(
4285 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4286 OutputRegs.push_back(Phi.getReg(0));
4287
4288 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4289 Phi.addUse(InputOpsPieces[j][i]);
4290 Phi.add(MI.getOperand(1 + j * 2 + 1));
4291 }
4292 }
4293
4294 // Set the insert point after the existing PHIs
4295 MachineBasicBlock &MBB = *MI.getParent();
4297
4298 // Merge small outputs into MI's def.
4299 if (NumLeftovers) {
4300 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4301 } else {
4302 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4303 }
4304
4305 MI.eraseFromParent();
4306 return Legalized;
4307}
4308
4311 unsigned TypeIdx,
4312 LLT NarrowTy) {
4313 const int NumDst = MI.getNumOperands() - 1;
4314 const Register SrcReg = MI.getOperand(NumDst).getReg();
4315 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4316 LLT SrcTy = MRI.getType(SrcReg);
4317
4318 if (TypeIdx != 1 || NarrowTy == DstTy)
4319 return UnableToLegalize;
4320
4321 // Requires compatible types. Otherwise SrcReg should have been defined by
4322 // merge-like instruction that would get artifact combined. Most likely
4323 // instruction that defines SrcReg has to perform more/fewer elements
4324 // legalization compatible with NarrowTy.
4325 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4326 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4327
4328 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4329 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4330 return UnableToLegalize;
4331
4332 // This is most likely DstTy (smaller then register size) packed in SrcTy
4333 // (larger then register size) and since unmerge was not combined it will be
4334 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4335 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4336
4337 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4338 //
4339 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4340 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4341 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4342 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4343 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4344 const int PartsPerUnmerge = NumDst / NumUnmerge;
4345
4346 for (int I = 0; I != NumUnmerge; ++I) {
4347 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4348
4349 for (int J = 0; J != PartsPerUnmerge; ++J)
4350 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4351 MIB.addUse(Unmerge.getReg(I));
4352 }
4353
4354 MI.eraseFromParent();
4355 return Legalized;
4356}
4357
4360 LLT NarrowTy) {
4361 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4362 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4363 // that should have been artifact combined. Most likely instruction that uses
4364 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4365 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4366 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4367 if (NarrowTy == SrcTy)
4368 return UnableToLegalize;
4369
4370 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4371 // is for old mir tests. Since the changes to more/fewer elements it should no
4372 // longer be possible to generate MIR like this when starting from llvm-ir
4373 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4374 if (TypeIdx == 1) {
4375 assert(SrcTy.isVector() && "Expected vector types");
4376 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4377 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4378 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4379 return UnableToLegalize;
4380 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4381 //
4382 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4383 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4384 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4385 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4386 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4387 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4388
4390 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4391 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4392 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4393 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4394 Elts.push_back(Unmerge.getReg(j));
4395 }
4396
4397 SmallVector<Register, 8> NarrowTyElts;
4398 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4399 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4400 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4401 ++i, Offset += NumNarrowTyElts) {
4402 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4403 NarrowTyElts.push_back(
4404 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4405 }
4406
4407 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4408 MI.eraseFromParent();
4409 return Legalized;
4410 }
4411
4412 assert(TypeIdx == 0 && "Bad type index");
4413 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4414 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4415 return UnableToLegalize;
4416
4417 // This is most likely SrcTy (smaller then register size) packed in DstTy
4418 // (larger then register size) and since merge was not combined it will be
4419 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4420 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4421
4422 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4423 //
4424 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4425 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4426 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4427 SmallVector<Register, 8> NarrowTyElts;
4428 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4429 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4430 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4431 for (unsigned i = 0; i < NumParts; ++i) {
4433 for (unsigned j = 0; j < NumElts; ++j)
4434 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4435 NarrowTyElts.push_back(
4436 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4437 }
4438
4439 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4440 MI.eraseFromParent();
4441 return Legalized;
4442}
4443
4446 unsigned TypeIdx,
4447 LLT NarrowVecTy) {
4448 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4449 Register InsertVal;
4450 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4451
4452 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4453 if (IsInsert)
4454 InsertVal = MI.getOperand(2).getReg();
4455
4456 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4457
4458 // TODO: Handle total scalarization case.
4459 if (!NarrowVecTy.isVector())
4460 return UnableToLegalize;
4461
4462 LLT VecTy = MRI.getType(SrcVec);
4463
4464 // If the index is a constant, we can really break this down as you would
4465 // expect, and index into the target size pieces.
4466 int64_t IdxVal;
4467 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4468 if (MaybeCst) {
4469 IdxVal = MaybeCst->Value.getSExtValue();
4470 // Avoid out of bounds indexing the pieces.
4471 if (IdxVal >= VecTy.getNumElements()) {
4472 MIRBuilder.buildUndef(DstReg);
4473 MI.eraseFromParent();
4474 return Legalized;
4475 }
4476
4477 SmallVector<Register, 8> VecParts;
4478 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4479
4480 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4481 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4482 TargetOpcode::G_ANYEXT);
4483
4484 unsigned NewNumElts = NarrowVecTy.getNumElements();
4485
4486 LLT IdxTy = MRI.getType(Idx);
4487 int64_t PartIdx = IdxVal / NewNumElts;
4488 auto NewIdx =
4489 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4490
4491 if (IsInsert) {
4492 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4493
4494 // Use the adjusted index to insert into one of the subvectors.
4495 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4496 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4497 VecParts[PartIdx] = InsertPart.getReg(0);
4498
4499 // Recombine the inserted subvector with the others to reform the result
4500 // vector.
4501 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4502 } else {
4503 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4504 }
4505
4506 MI.eraseFromParent();
4507 return Legalized;
4508 }
4509
4510 // With a variable index, we can't perform the operation in a smaller type, so
4511 // we're forced to expand this.
4512 //
4513 // TODO: We could emit a chain of compare/select to figure out which piece to
4514 // index.
4516}
4517
4520 LLT NarrowTy) {
4521 // FIXME: Don't know how to handle secondary types yet.
4522 if (TypeIdx != 0)
4523 return UnableToLegalize;
4524
4525 // This implementation doesn't work for atomics. Give up instead of doing
4526 // something invalid.
4527 if (LdStMI.isAtomic())
4528 return UnableToLegalize;
4529
4530 bool IsLoad = isa<GLoad>(LdStMI);
4531 Register ValReg = LdStMI.getReg(0);
4532 Register AddrReg = LdStMI.getPointerReg();
4533 LLT ValTy = MRI.getType(ValReg);
4534
4535 // FIXME: Do we need a distinct NarrowMemory legalize action?
4536 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4537 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4538 return UnableToLegalize;
4539 }
4540
4541 int NumParts = -1;
4542 int NumLeftover = -1;
4543 LLT LeftoverTy;
4544 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4545 if (IsLoad) {
4546 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4547 } else {
4548 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4549 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4550 NumParts = NarrowRegs.size();
4551 NumLeftover = NarrowLeftoverRegs.size();
4552 }
4553 }
4554
4555 if (NumParts == -1)
4556 return UnableToLegalize;
4557
4558 LLT PtrTy = MRI.getType(AddrReg);
4559 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4560
4561 unsigned TotalSize = ValTy.getSizeInBits();
4562
4563 // Split the load/store into PartTy sized pieces starting at Offset. If this
4564 // is a load, return the new registers in ValRegs. For a store, each elements
4565 // of ValRegs should be PartTy. Returns the next offset that needs to be
4566 // handled.
4568 auto MMO = LdStMI.getMMO();
4569 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4570 unsigned NumParts, unsigned Offset) -> unsigned {
4572 unsigned PartSize = PartTy.getSizeInBits();
4573 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4574 ++Idx) {
4575 unsigned ByteOffset = Offset / 8;
4576 Register NewAddrReg;
4577
4578 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4579
4580 MachineMemOperand *NewMMO =
4581 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4582
4583 if (IsLoad) {
4584 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4585 ValRegs.push_back(Dst);
4586 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4587 } else {
4588 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4589 }
4590 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4591 }
4592
4593 return Offset;
4594 };
4595
4596 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4597 unsigned HandledOffset =
4598 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4599
4600 // Handle the rest of the register if this isn't an even type breakdown.
4601 if (LeftoverTy.isValid())
4602 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4603
4604 if (IsLoad) {
4605 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4606 LeftoverTy, NarrowLeftoverRegs);
4607 }
4608
4609 LdStMI.eraseFromParent();
4610 return Legalized;
4611}
4612
4615 LLT NarrowTy) {
4616 using namespace TargetOpcode;
4617 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4618 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4619
4620 switch (MI.getOpcode()) {
4621 case G_IMPLICIT_DEF:
4622 case G_TRUNC:
4623 case G_AND:
4624 case G_OR:
4625 case G_XOR:
4626 case G_ADD:
4627 case G_SUB:
4628 case G_MUL:
4629 case G_PTR_ADD:
4630 case G_SMULH:
4631 case G_UMULH:
4632 case G_FADD:
4633 case G_FMUL:
4634 case G_FSUB:
4635 case G_FNEG:
4636 case G_FABS:
4637 case G_FCANONICALIZE:
4638 case G_FDIV:
4639 case G_FREM:
4640 case G_FMA:
4641 case G_FMAD:
4642 case G_FPOW:
4643 case G_FEXP:
4644 case G_FEXP2:
4645 case G_FEXP10:
4646 case G_FLOG:
4647 case G_FLOG2:
4648 case G_FLOG10:
4649 case G_FLDEXP:
4650 case G_FNEARBYINT:
4651 case G_FCEIL:
4652 case G_FFLOOR:
4653 case G_FRINT:
4654 case G_INTRINSIC_ROUND:
4655 case G_INTRINSIC_ROUNDEVEN:
4656 case G_INTRINSIC_TRUNC:
4657 case G_FCOS:
4658 case G_FSIN:
4659 case G_FSQRT:
4660 case G_BSWAP:
4661 case G_BITREVERSE:
4662 case G_SDIV:
4663 case G_UDIV:
4664 case G_SREM:
4665 case G_UREM:
4666 case G_SDIVREM:
4667 case G_UDIVREM:
4668 case G_SMIN:
4669 case G_SMAX:
4670 case G_UMIN:
4671 case G_UMAX:
4672 case G_ABS:
4673 case G_FMINNUM:
4674 case G_FMAXNUM:
4675 case G_FMINNUM_IEEE:
4676 case G_FMAXNUM_IEEE:
4677 case G_FMINIMUM:
4678 case G_FMAXIMUM:
4679 case G_FSHL:
4680 case G_FSHR:
4681 case G_ROTL:
4682 case G_ROTR:
4683 case G_FREEZE:
4684 case G_SADDSAT:
4685 case G_SSUBSAT:
4686 case G_UADDSAT:
4687 case G_USUBSAT:
4688 case G_UMULO:
4689 case G_SMULO:
4690 case G_SHL:
4691 case G_LSHR:
4692 case G_ASHR:
4693 case G_SSHLSAT:
4694 case G_USHLSAT:
4695 case G_CTLZ:
4696 case G_CTLZ_ZERO_UNDEF:
4697 case G_CTTZ:
4698 case G_CTTZ_ZERO_UNDEF:
4699 case G_CTPOP:
4700 case G_FCOPYSIGN:
4701 case G_ZEXT:
4702 case G_SEXT:
4703 case G_ANYEXT:
4704 case G_FPEXT:
4705 case G_FPTRUNC:
4706 case G_SITOFP:
4707 case G_UITOFP:
4708 case G_FPTOSI:
4709 case G_FPTOUI:
4710 case G_INTTOPTR:
4711 case G_PTRTOINT:
4712 case G_ADDRSPACE_CAST:
4713 case G_UADDO:
4714 case G_USUBO:
4715 case G_UADDE:
4716 case G_USUBE:
4717 case G_SADDO:
4718 case G_SSUBO:
4719 case G_SADDE:
4720 case G_SSUBE:
4721 case G_STRICT_FADD:
4722 case G_STRICT_FSUB:
4723 case G_STRICT_FMUL:
4724 case G_STRICT_FMA:
4725 case G_STRICT_FLDEXP:
4726 case G_FFREXP:
4727 return fewerElementsVectorMultiEltType(GMI, NumElts);
4728 case G_ICMP:
4729 case G_FCMP:
4730 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4731 case G_IS_FPCLASS:
4732 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4733 case G_SELECT:
4734 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4735 return fewerElementsVectorMultiEltType(GMI, NumElts);
4736 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4737 case G_PHI:
4738 return fewerElementsVectorPhi(GMI, NumElts);
4739 case G_UNMERGE_VALUES:
4740 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4741 case G_BUILD_VECTOR:
4742 assert(TypeIdx == 0 && "not a vector type index");
4743 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4744 case G_CONCAT_VECTORS:
4745 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4746 return UnableToLegalize;
4747 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4748 case G_EXTRACT_VECTOR_ELT:
4749 case G_INSERT_VECTOR_ELT:
4750 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4751 case G_LOAD:
4752 case G_STORE:
4753 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4754 case G_SEXT_INREG:
4755 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4757 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4758 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4759 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4760 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4761 case G_SHUFFLE_VECTOR:
4762 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4763 case G_FPOWI:
4764 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4765 case G_BITCAST:
4766 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4767 case G_INTRINSIC_FPTRUNC_ROUND:
4768 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
4769 default:
4770 return UnableToLegalize;
4771 }
4772}
4773
4776 LLT NarrowTy) {
4777 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4778 "Not a bitcast operation");
4779
4780 if (TypeIdx != 0)
4781 return UnableToLegalize;
4782
4783 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4784
4785 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4786 LLT SrcNarrowTy =
4787 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4788
4789 // Split the Src and Dst Reg into smaller registers
4790 SmallVector<Register> SrcVRegs, BitcastVRegs;
4791 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4792 return UnableToLegalize;
4793
4794 // Build new smaller bitcast instructions
4795 // Not supporting Leftover types for now but will have to
4796 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4797 BitcastVRegs.push_back(
4798 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4799
4800 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4801 MI.eraseFromParent();
4802 return Legalized;
4803}
4804
4806 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4807 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4808 if (TypeIdx != 0)
4809 return UnableToLegalize;
4810
4811 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4812 MI.getFirst3RegLLTs();
4813 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4814 // The shuffle should be canonicalized by now.
4815 if (DstTy != Src1Ty)
4816 return UnableToLegalize;
4817 if (DstTy != Src2Ty)
4818 return UnableToLegalize;
4819
4820 if (!isPowerOf2_32(DstTy.getNumElements()))
4821 return UnableToLegalize;
4822
4823 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4824 // Further legalization attempts will be needed to do split further.
4825 NarrowTy =
4826 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4827 unsigned NewElts = NarrowTy.getNumElements();
4828
4829 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4830 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4831 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4832 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4833 SplitSrc2Regs[1]};
4834
4835 Register Hi, Lo;
4836
4837 // If Lo or Hi uses elements from at most two of the four input vectors, then
4838 // express it as a vector shuffle of those two inputs. Otherwise extract the
4839 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4841 for (unsigned High = 0; High < 2; ++High) {
4842 Register &Output = High ? Hi : Lo;
4843
4844 // Build a shuffle mask for the output, discovering on the fly which
4845 // input vectors to use as shuffle operands (recorded in InputUsed).
4846 // If building a suitable shuffle vector proves too hard, then bail
4847 // out with useBuildVector set.
4848 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4849 unsigned FirstMaskIdx = High * NewElts;
4850 bool UseBuildVector = false;
4851 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4852 // The mask element. This indexes into the input.
4853 int Idx = Mask[FirstMaskIdx + MaskOffset];
4854
4855 // The input vector this mask element indexes into.
4856 unsigned Input = (unsigned)Idx / NewElts;
4857
4858 if (Input >= std::size(Inputs)) {
4859 // The mask element does not index into any input vector.
4860 Ops.push_back(-1);
4861 continue;
4862 }
4863
4864 // Turn the index into an offset from the start of the input vector.
4865 Idx -= Input * NewElts;
4866
4867 // Find or create a shuffle vector operand to hold this input.
4868 unsigned OpNo;
4869 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4870 if (InputUsed[OpNo] == Input) {
4871 // This input vector is already an operand.
4872 break;
4873 } else if (InputUsed[OpNo] == -1U) {
4874 // Create a new operand for this input vector.
4875 InputUsed[OpNo] = Input;
4876 break;
4877 }
4878 }
4879
4880 if (OpNo >= std::size(InputUsed)) {
4881 // More than two input vectors used! Give up on trying to create a
4882 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4883 UseBuildVector = true;
4884 break;
4885 }
4886
4887 // Add the mask index for the new shuffle vector.
4888 Ops.push_back(Idx + OpNo * NewElts);
4889 }
4890
4891 if (UseBuildVector) {
4892 LLT EltTy = NarrowTy.getElementType();
4894
4895 // Extract the input elements by hand.
4896 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4897 // The mask element. This indexes into the input.
4898 int Idx = Mask[FirstMaskIdx + MaskOffset];
4899
4900 // The input vector this mask element indexes into.
4901 unsigned Input = (unsigned)Idx / NewElts;
4902
4903 if (Input >= std::size(Inputs)) {
4904 // The mask element is "undef" or indexes off the end of the input.
4905 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4906 continue;
4907 }
4908
4909 // Turn the index into an offset from the start of the input vector.
4910 Idx -= Input * NewElts;
4911
4912 // Extract the vector element by hand.
4913 SVOps.push_back(MIRBuilder
4914 .buildExtractVectorElement(
4915 EltTy, Inputs[Input],
4917 .getReg(0));
4918 }
4919
4920 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4921 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4922 } else if (InputUsed[0] == -1U) {
4923 // No input vectors were used! The result is undefined.
4924 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4925 } else {
4926 Register Op0 = Inputs[InputUsed[0]];
4927 // If only one input was used, use an undefined vector for the other.
4928 Register Op1 = InputUsed[1] == -1U
4929 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4930 : Inputs[InputUsed[1]];
4931 // At least one input vector was used. Create a new shuffle vector.
4932 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4933 }
4934
4935 Ops.clear();
4936 }
4937
4938 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4939 MI.eraseFromParent();
4940 return Legalized;
4941}
4942
4944 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4945 auto &RdxMI = cast<GVecReduce>(MI);
4946
4947 if (TypeIdx != 1)
4948 return UnableToLegalize;
4949
4950 // The semantics of the normal non-sequential reductions allow us to freely
4951 // re-associate the operation.
4952 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4953
4954 if (NarrowTy.isVector() &&
4955 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4956 return UnableToLegalize;
4957
4958 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4959 SmallVector<Register> SplitSrcs;
4960 // If NarrowTy is a scalar then we're being asked to scalarize.
4961 const unsigned NumParts =
4962 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4963 : SrcTy.getNumElements();
4964
4965 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4966 if (NarrowTy.isScalar()) {
4967 if (DstTy != NarrowTy)
4968 return UnableToLegalize; // FIXME: handle implicit extensions.
4969
4970 if (isPowerOf2_32(NumParts)) {
4971 // Generate a tree of scalar operations to reduce the critical path.
4972 SmallVector<Register> PartialResults;
4973 unsigned NumPartsLeft = NumParts;
4974 while (NumPartsLeft > 1) {
4975 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4976 PartialResults.emplace_back(
4978 .buildInstr(ScalarOpc, {NarrowTy},
4979 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4980 .getReg(0));
4981 }
4982 SplitSrcs = PartialResults;
4983 PartialResults.clear();
4984 NumPartsLeft = SplitSrcs.size();
4985 }
4986 assert(SplitSrcs.size() == 1);
4987 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4988 MI.eraseFromParent();
4989 return Legalized;
4990 }
4991 // If we can't generate a tree, then just do sequential operations.
4992 Register Acc = SplitSrcs[0];
4993 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4994 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4995 .getReg(0);
4996 MIRBuilder.buildCopy(DstReg, Acc);
4997 MI.eraseFromParent();
4998 return Legalized;
4999 }
5000 SmallVector<Register> PartialReductions;
5001 for (unsigned Part = 0; Part < NumParts; ++Part) {
5002 PartialReductions.push_back(
5003 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5004 .getReg(0));
5005 }
5006
5007 // If the types involved are powers of 2, we can generate intermediate vector
5008 // ops, before generating a final reduction operation.
5009 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5010 isPowerOf2_32(NarrowTy.getNumElements())) {
5011 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5012 }
5013
5014 Register Acc = PartialReductions[0];
5015 for (unsigned Part = 1; Part < NumParts; ++Part) {
5016 if (Part == NumParts - 1) {
5017 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5018 {Acc, PartialReductions[Part]});
5019 } else {
5020 Acc = MIRBuilder
5021 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5022 .getReg(0);
5023 }
5024 }
5025 MI.eraseFromParent();
5026 return Legalized;
5027}
5028
5031 unsigned int TypeIdx,
5032 LLT NarrowTy) {
5033 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5034 MI.getFirst3RegLLTs();
5035 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5036 DstTy != NarrowTy)
5037 return UnableToLegalize;
5038
5039 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5040 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5041 "Unexpected vecreduce opcode");
5042 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5043 ? TargetOpcode::G_FADD
5044 : TargetOpcode::G_FMUL;
5045
5046 SmallVector<Register> SplitSrcs;
5047 unsigned NumParts = SrcTy.getNumElements();
5048 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5049 Register Acc = ScalarReg;
5050 for (unsigned i = 0; i < NumParts; i++)
5051 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5052 .getReg(0);
5053
5054 MIRBuilder.buildCopy(DstReg, Acc);
5055 MI.eraseFromParent();
5056 return Legalized;
5057}
5058
5060LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5061 LLT SrcTy, LLT NarrowTy,
5062 unsigned ScalarOpc) {
5063 SmallVector<Register> SplitSrcs;
5064 // Split the sources into NarrowTy size pieces.
5065 extractParts(SrcReg, NarrowTy,
5066 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5067 MIRBuilder, MRI);
5068 // We're going to do a tree reduction using vector operations until we have
5069 // one NarrowTy size value left.
5070 while (SplitSrcs.size() > 1) {
5071 SmallVector<Register> PartialRdxs;
5072 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5073 Register LHS = SplitSrcs[Idx];
5074 Register RHS = SplitSrcs[Idx + 1];
5075 // Create the intermediate vector op.
5076 Register Res =
5077 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5078 PartialRdxs.push_back(Res);
5079 }
5080 SplitSrcs = std::move(PartialRdxs);
5081 }
5082 // Finally generate the requested NarrowTy based reduction.
5084 MI.getOperand(1).setReg(SplitSrcs[0]);
5086 return Legalized;
5087}
5088
5091 const LLT HalfTy, const LLT AmtTy) {
5092
5093 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5094 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5095 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5096
5097 if (Amt.isZero()) {
5098 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5099 MI.eraseFromParent();
5100 return Legalized;
5101 }
5102
5103 LLT NVT = HalfTy;
5104 unsigned NVTBits = HalfTy.getSizeInBits();
5105 unsigned VTBits = 2 * NVTBits;
5106
5107 SrcOp Lo(Register(0)), Hi(Register(0));
5108 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5109 if (Amt.ugt(VTBits)) {
5110 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5111 } else if (Amt.ugt(NVTBits)) {
5112 Lo = MIRBuilder.buildConstant(NVT, 0);
5113 Hi = MIRBuilder.buildShl(NVT, InL,
5114 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5115 } else if (Amt == NVTBits) {
5116 Lo = MIRBuilder.buildConstant(NVT, 0);
5117 Hi = InL;
5118 } else {
5119 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5120 auto OrLHS =
5121 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5122 auto OrRHS = MIRBuilder.buildLShr(
5123 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5124 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5125 }
5126 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5127 if (Amt.ugt(VTBits)) {
5128 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5129 } else if (Amt.ugt(NVTBits)) {
5130 Lo = MIRBuilder.buildLShr(NVT, InH,
5131 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5132 Hi = MIRBuilder.buildConstant(NVT, 0);
5133 } else if (Amt == NVTBits) {
5134 Lo = InH;
5135 Hi = MIRBuilder.buildConstant(NVT, 0);
5136 } else {
5137 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5138
5139 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5140 auto OrRHS = MIRBuilder.buildShl(
5141 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5142
5143 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5144 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5145 }
5146 } else {
5147 if (Amt.ugt(VTBits)) {
5149 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5150 } else if (Amt.ugt(NVTBits)) {
5151 Lo = MIRBuilder.buildAShr(NVT, InH,
5152 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5153 Hi = MIRBuilder.buildAShr(NVT, InH,
5154 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5155 } else if (Amt == NVTBits) {
5156 Lo = InH;
5157 Hi = MIRBuilder.buildAShr(NVT, InH,
5158 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5159 } else {
5160 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5161
5162 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5163 auto OrRHS = MIRBuilder.buildShl(
5164 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5165
5166 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5167 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5168 }
5169 }
5170
5171 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5172 MI.eraseFromParent();
5173
5174 return Legalized;
5175}
5176
5177// TODO: Optimize if constant shift amount.
5180 LLT RequestedTy) {
5181 if (TypeIdx == 1) {
5183 narrowScalarSrc(MI, RequestedTy, 2);
5185 return Legalized;
5186 }
5187
5188 Register DstReg = MI.getOperand(0).getReg();
5189 LLT DstTy = MRI.getType(DstReg);
5190 if (DstTy.isVector())
5191 return UnableToLegalize;
5192
5193 Register Amt = MI.getOperand(2).getReg();
5194 LLT ShiftAmtTy = MRI.getType(Amt);
5195 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5196 if (DstEltSize % 2 != 0)
5197 return UnableToLegalize;
5198
5199 // Ignore the input type. We can only go to exactly half the size of the
5200 // input. If that isn't small enough, the resulting pieces will be further
5201 // legalized.
5202 const unsigned NewBitSize = DstEltSize / 2;
5203 const LLT HalfTy = LLT::scalar(NewBitSize);
5204 const LLT CondTy = LLT::scalar(1);
5205
5206 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5207 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5208 ShiftAmtTy);
5209 }
5210
5211 // TODO: Expand with known bits.
5212
5213 // Handle the fully general expansion by an unknown amount.
5214 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5215
5216 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5217 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5218 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5219
5220 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5221 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5222
5223 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5224 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5225 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5226
5227 Register ResultRegs[2];
5228 switch (MI.getOpcode()) {
5229 case TargetOpcode::G_SHL: {
5230 // Short: ShAmt < NewBitSize
5231 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5232
5233 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5234 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5235 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5236
5237 // Long: ShAmt >= NewBitSize
5238 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5239 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5240
5241 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5242 auto Hi = MIRBuilder.buildSelect(
5243 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5244
5245 ResultRegs[0] = Lo.getReg(0);
5246 ResultRegs[1] = Hi.getReg(0);
5247 break;
5248 }
5249 case TargetOpcode::G_LSHR:
5250 case TargetOpcode::G_ASHR: {
5251 // Short: ShAmt < NewBitSize
5252 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5253
5254 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5255 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5256 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5257
5258 // Long: ShAmt >= NewBitSize
5260 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5261 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5262 } else {
5263 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5264 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5265 }
5266 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5267 {InH, AmtExcess}); // Lo from Hi part.
5268
5269 auto Lo = MIRBuilder.buildSelect(
5270 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5271
5272 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5273
5274 ResultRegs[0] = Lo.getReg(0);
5275 ResultRegs[1] = Hi.getReg(0);
5276 break;
5277 }
5278 default:
5279 llvm_unreachable("not a shift");
5280 }
5281
5282 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5283 MI.eraseFromParent();
5284 return Legalized;
5285}
5286
5289 LLT MoreTy) {
5290 assert(TypeIdx == 0 && "Expecting only Idx 0");
5291
5293 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5294 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5296 moreElementsVectorSrc(MI, MoreTy, I);
5297 }
5298
5299 MachineBasicBlock &MBB = *MI.getParent();
5301 moreElementsVectorDst(MI, MoreTy, 0);
5303 return Legalized;
5304}
5305
5306MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5307 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5308 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5309
5310 switch (Opcode) {
5311 default:
5313 "getNeutralElementForVecReduce called with invalid opcode!");
5314 case TargetOpcode::G_VECREDUCE_ADD:
5315 case TargetOpcode::G_VECREDUCE_OR:
5316 case TargetOpcode::G_VECREDUCE_XOR:
5317 case TargetOpcode::G_VECREDUCE_UMAX:
5318 return MIRBuilder.buildConstant(Ty, 0);
5319 case TargetOpcode::G_VECREDUCE_MUL:
5320 return MIRBuilder.buildConstant(Ty, 1);
5321 case TargetOpcode::G_VECREDUCE_AND:
5322 case TargetOpcode::G_VECREDUCE_UMIN:
5325 case TargetOpcode::G_VECREDUCE_SMAX:
5328 case TargetOpcode::G_VECREDUCE_SMIN:
5331 case TargetOpcode::G_VECREDUCE_FADD:
5332 return MIRBuilder.buildFConstant(Ty, -0.0);
5333 case TargetOpcode::G_VECREDUCE_FMUL:
5334 return MIRBuilder.buildFConstant(Ty, 1.0);
5335 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5336 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5337 assert(false && "getNeutralElementForVecReduce unimplemented for "
5338 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5339 }
5340 llvm_unreachable("switch expected to return!");
5341}
5342
5345 LLT MoreTy) {
5346 unsigned Opc = MI.getOpcode();
5347 switch (Opc) {
5348 case TargetOpcode::G_IMPLICIT_DEF:
5349 case TargetOpcode::G_LOAD: {
5350 if (TypeIdx != 0)
5351 return UnableToLegalize;
5353 moreElementsVectorDst(MI, MoreTy, 0);
5355 return Legalized;
5356 }
5357 case TargetOpcode::G_STORE:
5358 if (TypeIdx != 0)
5359 return UnableToLegalize;
5361 moreElementsVectorSrc(MI, MoreTy, 0);
5363 return Legalized;
5364 case TargetOpcode::G_AND:
5365 case TargetOpcode::G_OR:
5366 case TargetOpcode::G_XOR:
5367 case TargetOpcode::G_ADD:
5368 case TargetOpcode::G_SUB:
5369 case TargetOpcode::G_MUL:
5370 case TargetOpcode::G_FADD:
5371 case TargetOpcode::G_FSUB:
5372 case TargetOpcode::G_FMUL:
5373 case TargetOpcode::G_FDIV:
5374 case TargetOpcode::G_FCOPYSIGN:
5375 case TargetOpcode::G_UADDSAT:
5376 case TargetOpcode::G_USUBSAT:
5377 case TargetOpcode::G_SADDSAT:
5378 case TargetOpcode::G_SSUBSAT:
5379 case TargetOpcode::G_SMIN:
5380 case TargetOpcode::G_SMAX:
5381 case TargetOpcode::G_UMIN:
5382 case TargetOpcode::G_UMAX:
5383 case TargetOpcode::G_FMINNUM:
5384 case TargetOpcode::G_FMAXNUM:
5385 case TargetOpcode::G_FMINNUM_IEEE:
5386 case TargetOpcode::G_FMAXNUM_IEEE:
5387 case TargetOpcode::G_FMINIMUM:
5388 case TargetOpcode::G_FMAXIMUM:
5389 case TargetOpcode::G_STRICT_FADD:
5390 case TargetOpcode::G_STRICT_FSUB:
5391 case TargetOpcode::G_STRICT_FMUL:
5392 case TargetOpcode::G_SHL:
5393 case TargetOpcode::G_ASHR:
5394 case TargetOpcode::G_LSHR: {
5396 moreElementsVectorSrc(MI, MoreTy, 1);
5397 moreElementsVectorSrc(MI, MoreTy, 2);
5398 moreElementsVectorDst(MI, MoreTy, 0);
5400 return Legalized;
5401 }
5402 case TargetOpcode::G_FMA:
5403 case TargetOpcode::G_STRICT_FMA:
5404 case TargetOpcode::G_FSHR:
5405 case TargetOpcode::G_FSHL: {
5407 moreElementsVectorSrc(MI, MoreTy, 1);
5408 moreElementsVectorSrc(MI, MoreTy, 2);
5409 moreElementsVectorSrc(MI, MoreTy, 3);
5410 moreElementsVectorDst(MI, MoreTy, 0);
5412 return Legalized;
5413 }
5414 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5415 case TargetOpcode::G_EXTRACT:
5416 if (TypeIdx != 1)
5417 return UnableToLegalize;
5419 moreElementsVectorSrc(MI, MoreTy, 1);
5421 return Legalized;
5422 case TargetOpcode::G_INSERT:
5423 case TargetOpcode::G_INSERT_VECTOR_ELT:
5424 case TargetOpcode::G_FREEZE:
5425 case TargetOpcode::G_FNEG:
5426 case TargetOpcode::G_FABS:
5427 case TargetOpcode::G_FSQRT:
5428 case TargetOpcode::G_FCEIL:
5429 case TargetOpcode::G_FFLOOR:
5430 case TargetOpcode::G_FNEARBYINT:
5431 case TargetOpcode::G_FRINT:
5432 case TargetOpcode::G_INTRINSIC_ROUND:
5433 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5434 case TargetOpcode::G_INTRINSIC_TRUNC:
5435 case TargetOpcode::G_BSWAP:
5436 case TargetOpcode::G_FCANONICALIZE:
5437 case TargetOpcode::G_SEXT_INREG:
5438 case TargetOpcode::G_ABS:
5439 if (TypeIdx != 0)
5440 return UnableToLegalize;
5442 moreElementsVectorSrc(MI, MoreTy, 1);
5443 moreElementsVectorDst(MI, MoreTy, 0);
5445 return Legalized;
5446 case TargetOpcode::G_SELECT: {
5447 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5448 if (TypeIdx == 1) {
5449 if (!CondTy.isScalar() ||
5450 DstTy.getElementCount() != MoreTy.getElementCount())
5451 return UnableToLegalize;
5452
5453 // This is turning a scalar select of vectors into a vector
5454 // select. Broadcast the select condition.
5455 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5457 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5459 return Legalized;
5460 }
5461
5462 if (CondTy.isVector())
5463 return UnableToLegalize;
5464
5466 moreElementsVectorSrc(MI, MoreTy, 2);
5467 moreElementsVectorSrc(MI, MoreTy, 3);
5468 moreElementsVectorDst(MI, MoreTy, 0);
5470 return Legalized;
5471 }
5472 case TargetOpcode::G_UNMERGE_VALUES:
5473 return UnableToLegalize;
5474 case TargetOpcode::G_PHI:
5475 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5476 case TargetOpcode::G_SHUFFLE_VECTOR:
5477 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5478 case TargetOpcode::G_BUILD_VECTOR: {
5480 for (auto Op : MI.uses()) {
5481 Elts.push_back(Op.getReg());
5482 }
5483
5484 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5486 }
5487
5489 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5490 MI.eraseFromParent();
5491 return Legalized;
5492 }
5493 case TargetOpcode::G_SEXT:
5494 case TargetOpcode::G_ZEXT:
5495 case TargetOpcode::G_ANYEXT:
5496 case TargetOpcode::G_TRUNC:
5497 case TargetOpcode::G_FPTRUNC:
5498 case TargetOpcode::G_FPEXT:
5499 case TargetOpcode::G_FPTOSI:
5500 case TargetOpcode::G_FPTOUI:
5501 case TargetOpcode::G_SITOFP:
5502 case TargetOpcode::G_UITOFP: {
5504 LLT SrcExtTy;
5505 LLT DstExtTy;
5506 if (TypeIdx == 0) {
5507 DstExtTy = MoreTy;
5508 SrcExtTy = LLT::fixed_vector(
5509 MoreTy.getNumElements(),
5510 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5511 } else {
5512 DstExtTy = LLT::fixed_vector(
5513 MoreTy.getNumElements(),
5514 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5515 SrcExtTy = MoreTy;
5516 }
5517 moreElementsVectorSrc(MI, SrcExtTy, 1);
5518 moreElementsVectorDst(MI, DstExtTy, 0);
5520 return Legalized;
5521 }
5522 case TargetOpcode::G_ICMP:
5523 case TargetOpcode::G_FCMP: {
5524 if (TypeIdx != 1)
5525 return UnableToLegalize;
5526
5528 moreElementsVectorSrc(MI, MoreTy, 2);
5529 moreElementsVectorSrc(MI, MoreTy, 3);
5530 LLT CondTy = LLT::fixed_vector(
5531 MoreTy.getNumElements(),
5532 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5533 moreElementsVectorDst(MI, CondTy, 0);
5535 return Legalized;
5536 }
5537 case TargetOpcode::G_BITCAST: {
5538 if (TypeIdx != 0)
5539 return UnableToLegalize;
5540
5541 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5542 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5543
5544 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5545 if (coefficient % DstTy.getNumElements() != 0)
5546 return UnableToLegalize;
5547
5548 coefficient = coefficient / DstTy.getNumElements();
5549
5550 LLT NewTy = SrcTy.changeElementCount(
5551 ElementCount::get(coefficient, MoreTy.isScalable()));
5553 moreElementsVectorSrc(MI, NewTy, 1);
5554 moreElementsVectorDst(MI, MoreTy, 0);
5556 return Legalized;
5557 }
5558 case TargetOpcode::G_VECREDUCE_FADD:
5559 case TargetOpcode::G_VECREDUCE_FMUL:
5560 case TargetOpcode::G_VECREDUCE_ADD:
5561 case TargetOpcode::G_VECREDUCE_MUL:
5562 case TargetOpcode::G_VECREDUCE_AND:
5563 case TargetOpcode::G_VECREDUCE_OR:
5564 case TargetOpcode::G_VECREDUCE_XOR:
5565 case TargetOpcode::G_VECREDUCE_SMAX:
5566 case TargetOpcode::G_VECREDUCE_SMIN:
5567 case TargetOpcode::G_VECREDUCE_UMAX:
5568 case TargetOpcode::G_VECREDUCE_UMIN: {
5569 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5570 MachineOperand &MO = MI.getOperand(1);
5571 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5572 auto NeutralElement = getNeutralElementForVecReduce(
5573 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5574
5576 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5577 i != e; i++) {
5578 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5579 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5580 NeutralElement, Idx);
5581 }
5582
5584 MO.setReg(NewVec.getReg(0));
5586 return Legalized;
5587 }
5588
5589 default:
5590 return UnableToLegalize;
5591 }
5592}
5593
5596 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5597 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5598 unsigned MaskNumElts = Mask.size();
5599 unsigned SrcNumElts = SrcTy.getNumElements();
5600 LLT DestEltTy = DstTy.getElementType();
5601
5602 if (MaskNumElts == SrcNumElts)
5603 return Legalized;
5604
5605 if (MaskNumElts < SrcNumElts) {
5606 // Extend mask to match new destination vector size with
5607 // undef values.
5608 SmallVector<int, 16> NewMask(Mask);
5609 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5610 NewMask.push_back(-1);
5611
5612 moreElementsVectorDst(MI, SrcTy, 0);
5614 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5615 MI.getOperand(1).getReg(),
5616 MI.getOperand(2).getReg(), NewMask);
5617 MI.eraseFromParent();
5618
5619 return Legalized;
5620 }
5621
5622 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5623 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5624 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5625
5626 // Create new source vectors by concatenating the initial
5627 // source vectors with undefined vectors of the same size.
5628 auto Undef = MIRBuilder.buildUndef(SrcTy);
5629 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5630 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5631 MOps1[0] = MI.getOperand(1).getReg();
5632 MOps2[0] = MI.getOperand(2).getReg();
5633
5634 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5635 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5636
5637 // Readjust mask for new input vector length.
5638 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5639 for (unsigned I = 0; I != MaskNumElts; ++I) {
5640 int Idx = Mask[I];
5641 if (Idx >= static_cast<int>(SrcNumElts))
5642 Idx += PaddedMaskNumElts - SrcNumElts;
5643 MappedOps[I] = Idx;
5644 }
5645
5646 // If we got more elements than required, extract subvector.
5647 if (MaskNumElts != PaddedMaskNumElts) {
5648 auto Shuffle =
5649 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5650
5651 SmallVector<Register, 16> Elts(MaskNumElts);
5652 for (unsigned I = 0; I < MaskNumElts; ++I) {
5653 Elts[I] =
5655 .getReg(0);
5656 }
5657 MIRBuilder.buildBuildVector(DstReg, Elts);
5658 } else {
5659 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5660 }
5661
5662 MI.eraseFromParent();
5664}
5665
5668 unsigned int TypeIdx, LLT MoreTy) {
5669 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5670 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5671 unsigned NumElts = DstTy.getNumElements();
5672 unsigned WidenNumElts = MoreTy.getNumElements();
5673
5674 if (DstTy.isVector() && Src1Ty.isVector() &&
5675 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5677 }
5678
5679 if (TypeIdx != 0)
5680 return UnableToLegalize;
5681
5682 // Expect a canonicalized shuffle.
5683 if (DstTy != Src1Ty || DstTy != Src2Ty)
5684 return UnableToLegalize;
5685
5686 moreElementsVectorSrc(MI, MoreTy, 1);
5687 moreElementsVectorSrc(MI, MoreTy, 2);
5688
5689 // Adjust mask based on new input vector length.
5690 SmallVector<int, 16> NewMask;
5691 for (unsigned I = 0; I != NumElts; ++I) {
5692 int Idx = Mask[I];
5693 if (Idx < static_cast<int>(NumElts))
5694 NewMask.push_back(Idx);
5695 else
5696 NewMask.push_back(Idx - NumElts + WidenNumElts);
5697 }
5698 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5699 NewMask.push_back(-1);
5700 moreElementsVectorDst(MI, MoreTy, 0);
5702 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5703 MI.getOperand(1).getReg(),
5704 MI.getOperand(2).getReg(), NewMask);
5705 MI.eraseFromParent();
5706 return Legalized;
5707}
5708
5709void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5710 ArrayRef<Register> Src1Regs,
5711 ArrayRef<Register> Src2Regs,
5712 LLT NarrowTy) {
5714 unsigned SrcParts = Src1Regs.size();
5715 unsigned DstParts = DstRegs.size();
5716
5717 unsigned DstIdx = 0; // Low bits of the result.
5718 Register FactorSum =
5719 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5720 DstRegs[DstIdx] = FactorSum;
5721
5722 unsigned CarrySumPrevDstIdx;
5724
5725 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5726 // Collect low parts of muls for DstIdx.
5727 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5728 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5730 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5731 Factors.push_back(Mul.getReg(0));
5732 }
5733 // Collect high parts of muls from previous DstIdx.
5734 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5735 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5736 MachineInstrBuilder Umulh =
5737 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5738 Factors.push_back(Umulh.getReg(0));
5739 }
5740 // Add CarrySum from additions calculated for previous DstIdx.
5741 if (DstIdx != 1) {
5742 Factors.push_back(CarrySumPrevDstIdx);
5743 }
5744
5745 Register CarrySum;
5746 // Add all factors and accumulate all carries into CarrySum.
5747 if (DstIdx != DstParts - 1) {
5748 MachineInstrBuilder Uaddo =
5749 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5750 FactorSum = Uaddo.getReg(0);
5751 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5752 for (unsigned i = 2; i < Factors.size(); ++i) {
5753 MachineInstrBuilder Uaddo =
5754 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5755 FactorSum = Uaddo.getReg(0);
5756 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5757 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5758 }
5759 } else {
5760 // Since value for the next index is not calculated, neither is CarrySum.
5761 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5762 for (unsigned i = 2; i < Factors.size(); ++i)
5763 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5764 }
5765
5766 CarrySumPrevDstIdx = CarrySum;
5767 DstRegs[DstIdx] = FactorSum;
5768 Factors.clear();
5769 }
5770}
5771
5774 LLT NarrowTy) {
5775 if (TypeIdx != 0)
5776 return UnableToLegalize;
5777
5778 Register DstReg = MI.getOperand(0).getReg();
5779 LLT DstType = MRI.getType(DstReg);
5780 // FIXME: add support for vector types
5781 if (DstType.isVector())
5782 return UnableToLegalize;
5783
5784 unsigned Opcode = MI.getOpcode();
5785 unsigned OpO, OpE, OpF;
5786 switch (Opcode) {
5787 case TargetOpcode::G_SADDO:
5788 case TargetOpcode::G_SADDE:
5789 case TargetOpcode::G_UADDO:
5790 case TargetOpcode::G_UADDE:
5791 case TargetOpcode::G_ADD:
5792 OpO = TargetOpcode::G_UADDO;
5793 OpE = TargetOpcode::G_UADDE;
5794 OpF = TargetOpcode::G_UADDE;
5795 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5796 OpF = TargetOpcode::G_SADDE;
5797 break;
5798 case TargetOpcode::G_SSUBO:
5799 case TargetOpcode::G_SSUBE:
5800 case TargetOpcode::G_USUBO:
5801 case TargetOpcode::G_USUBE:
5802 case TargetOpcode::G_SUB:
5803 OpO = TargetOpcode::G_USUBO;
5804 OpE = TargetOpcode::G_USUBE;
5805 OpF = TargetOpcode::G_USUBE;
5806 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5807 OpF = TargetOpcode::G_SSUBE;
5808 break;
5809 default:
5810 llvm_unreachable("Unexpected add/sub opcode!");
5811 }
5812
5813 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5814 unsigned NumDefs = MI.getNumExplicitDefs();
5815 Register Src1 = MI.getOperand(NumDefs).getReg();
5816 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5817 Register CarryDst, CarryIn;
5818 if (NumDefs == 2)
5819 CarryDst = MI.getOperand(1).getReg();
5820 if (MI.getNumOperands() == NumDefs + 3)
5821 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5822
5823 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5824 LLT LeftoverTy, DummyTy;
5825 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5826 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5827 MIRBuilder, MRI);
5828 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5829 MRI);
5830
5831 int NarrowParts = Src1Regs.size();
5832 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5833 Src1Regs.push_back(Src1Left[I]);
5834 Src2Regs.push_back(Src2Left[I]);
5835 }
5836 DstRegs.reserve(Src1Regs.size());
5837
5838 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5839 Register DstReg =
5840 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5842 // Forward the final carry-out to the destination register
5843 if (i == e - 1 && CarryDst)
5844 CarryOut = CarryDst;
5845
5846 if (!CarryIn) {
5847 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5848 {Src1Regs[i], Src2Regs[i]});
5849 } else if (i == e - 1) {
5850 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5851 {Src1Regs[i], Src2Regs[i], CarryIn});
5852 } else {
5853 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5854 {Src1Regs[i], Src2Regs[i], CarryIn});
5855 }
5856
5857 DstRegs.push_back(DstReg);
5858 CarryIn = CarryOut;
5859 }
5860 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5861 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5862 ArrayRef(DstRegs).drop_front(NarrowParts));
5863
5864 MI.eraseFromParent();
5865 return Legalized;
5866}
5867
5870 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5871
5872 LLT Ty = MRI.getType(DstReg);
5873 if (Ty.isVector())
5874 return UnableToLegalize;
5875
5876 unsigned Size = Ty.getSizeInBits();
5877 unsigned NarrowSize = NarrowTy.getSizeInBits();
5878 if (Size % NarrowSize != 0)
5879 return UnableToLegalize;
5880
5881 unsigned NumParts = Size / NarrowSize;
5882 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5883 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5884
5885 SmallVector<Register, 2> Src1Parts, Src2Parts;
5886 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5887 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5888 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5889 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5890
5891 // Take only high half of registers if this is high mul.
5892 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5893 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5894 MI.eraseFromParent();
5895 return Legalized;
5896}
5897
5900 LLT NarrowTy) {
5901 if (TypeIdx != 0)
5902 return UnableToLegalize;
5903
5904 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5905
5906 Register Src = MI.getOperand(1).getReg();
5907 LLT SrcTy = MRI.getType(Src);
5908
5909 // If all finite floats fit into the narrowed integer type, we can just swap
5910 // out the result type. This is practically only useful for conversions from
5911 // half to at least 16-bits, so just handle the one case.
5912 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5913 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5914 return UnableToLegalize;
5915
5917 narrowScalarDst(MI, NarrowTy, 0,
5918 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5920 return Legalized;
5921}
5922
5925 LLT NarrowTy) {
5926 if (TypeIdx != 1)
5927 return UnableToLegalize;
5928
5929 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5930
5931 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5932 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5933 // NarrowSize.
5934 if (SizeOp1 % NarrowSize != 0)
5935 return UnableToLegalize;
5936 int NumParts = SizeOp1 / NarrowSize;
5937
5938 SmallVector<Register, 2> SrcRegs, DstRegs;
5940 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
5941 MIRBuilder, MRI);
5942
5943 Register OpReg = MI.getOperand(0).getReg();
5944 uint64_t OpStart = MI.getOperand(2).getImm();
5945 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5946 for (int i = 0; i < NumParts; ++i) {
5947 unsigned SrcStart = i * NarrowSize;
5948
5949 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5950 // No part of the extract uses this subregister, ignore it.
5951 continue;
5952 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5953 // The entire subregister is extracted, forward the value.
5954 DstRegs.push_back(SrcRegs[i]);
5955 continue;
5956 }
5957
5958 // OpSegStart is where this destination segment would start in OpReg if it
5959 // extended infinitely in both directions.
5960 int64_t ExtractOffset;
5961 uint64_t SegSize;
5962 if (OpStart < SrcStart) {
5963 ExtractOffset = 0;
5964 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5965 } else {
5966 ExtractOffset = OpStart - SrcStart;
5967 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5968 }
5969
5970 Register SegReg = SrcRegs[i];
5971 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5972 // A genuine extract is needed.
5973 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5974 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5975 }
5976
5977 DstRegs.push_back(SegReg);
5978 }
5979
5980 Register DstReg = MI.getOperand(0).getReg();
5981 if (MRI.getType(DstReg).isVector())
5982 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5983 else if (DstRegs.size() > 1)
5984 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5985 else
5986 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5987 MI.eraseFromParent();
5988 return Legalized;
5989}
5990
5993 LLT NarrowTy) {
5994 // FIXME: Don't know how to handle secondary types yet.
5995 if (TypeIdx != 0)
5996 return UnableToLegalize;
5997
5998 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6000 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6001 LLT LeftoverTy;
6002 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6003 LeftoverRegs, MIRBuilder, MRI);
6004
6005 for (Register Reg : LeftoverRegs)
6006 SrcRegs.push_back(Reg);
6007
6008 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6009 Register OpReg = MI.getOperand(2).getReg();
6010 uint64_t OpStart = MI.getOperand(3).getImm();
6011 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6012 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6013 unsigned DstStart = I * NarrowSize;
6014
6015 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6016 // The entire subregister is defined by this insert, forward the new
6017 // value.
6018 DstRegs.push_back(OpReg);
6019 continue;
6020 }
6021
6022 Register SrcReg = SrcRegs[I];
6023 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6024 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6025 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6026 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6027 }
6028
6029 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6030 // No part of the insert affects this subregister, forward the original.
6031 DstRegs.push_back(SrcReg);
6032 continue;
6033 }
6034
6035 // OpSegStart is where this destination segment would start in OpReg if it
6036 // extended infinitely in both directions.
6037 int64_t ExtractOffset, InsertOffset;
6038 uint64_t SegSize;
6039 if (OpStart < DstStart) {
6040 InsertOffset = 0;
6041 ExtractOffset = DstStart - OpStart;
6042 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6043 } else {
6044 InsertOffset = OpStart - DstStart;
6045 ExtractOffset = 0;
6046 SegSize =
6047 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6048 }
6049
6050 Register SegReg = OpReg;
6051 if (ExtractOffset != 0 || SegSize != OpSize) {
6052 // A genuine extract is needed.
6053 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6054 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6055 }
6056
6057 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6058 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6059 DstRegs.push_back(DstReg);
6060 }
6061
6062 uint64_t WideSize = DstRegs.size() * NarrowSize;
6063 Register DstReg = MI.getOperand(0).getReg();
6064 if (WideSize > RegTy.getSizeInBits()) {
6065 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6066 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6067 MIRBuilder.buildTrunc(DstReg, MergeReg);
6068 } else
6069 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6070
6071 MI.eraseFromParent();
6072 return Legalized;
6073}
6074
6077 LLT NarrowTy) {
6078 Register DstReg = MI.getOperand(0).getReg();
6079 LLT DstTy = MRI.getType(DstReg);
6080
6081 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6082
6083 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6084 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6085 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6086 LLT LeftoverTy;
6087 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6088 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6089 return UnableToLegalize;
6090
6091 LLT Unused;
6092 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6093 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6094 llvm_unreachable("inconsistent extractParts result");
6095
6096 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6097 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6098 {Src0Regs[I], Src1Regs[I]});
6099 DstRegs.push_back(Inst.getReg(0));
6100 }
6101
6102 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6103 auto Inst = MIRBuilder.buildInstr(
6104 MI.getOpcode(),
6105 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6106 DstLeftoverRegs.push_back(Inst.getReg(0));
6107 }
6108
6109 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6110 LeftoverTy, DstLeftoverRegs);
6111
6112 MI.eraseFromParent();
6113 return Legalized;
6114}
6115
6118 LLT NarrowTy) {
6119 if (TypeIdx != 0)
6120 return UnableToLegalize;
6121
6122 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6123
6124 LLT DstTy = MRI.getType(DstReg);
6125 if (DstTy.isVector())
6126 return UnableToLegalize;
6127
6129 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6130 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6131 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6132
6133 MI.eraseFromParent();
6134 return Legalized;
6135}
6136
6139 LLT NarrowTy) {
6140 if (TypeIdx != 0)
6141 return UnableToLegalize;
6142
6143 Register CondReg = MI.getOperand(1).getReg();
6144 LLT CondTy = MRI.getType(CondReg);
6145 if (CondTy.isVector()) // TODO: Handle vselect
6146 return UnableToLegalize;
6147
6148 Register DstReg = MI.getOperand(0).getReg();
6149 LLT DstTy = MRI.getType(DstReg);
6150
6151 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6152 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6153 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6154 LLT LeftoverTy;
6155 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6156 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6157 return UnableToLegalize;
6158
6159 LLT Unused;
6160 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6161 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6162 llvm_unreachable("inconsistent extractParts result");
6163
6164 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6165 auto Select = MIRBuilder.buildSelect(NarrowTy,
6166 CondReg, Src1Regs[I], Src2Regs[I]);
6167 DstRegs.push_back(Select.getReg(0));
6168 }
6169
6170 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6172 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6173 DstLeftoverRegs.push_back(Select.getReg(0));
6174 }
6175
6176 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6177 LeftoverTy, DstLeftoverRegs);
6178
6179 MI.eraseFromParent();
6180 return Legalized;
6181}
6182
6185 LLT NarrowTy) {
6186 if (TypeIdx != 1)
6187 return UnableToLegalize;
6188
6189 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6190 unsigned NarrowSize = NarrowTy.getSizeInBits();
6191
6192 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6193 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6194
6196 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6197 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6198 auto C_0 = B.buildConstant(NarrowTy, 0);
6199 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6200 UnmergeSrc.getReg(1), C_0);
6201 auto LoCTLZ = IsUndef ?
6202 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6203 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6204 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6205 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6206 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6207 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6208
6209 MI.eraseFromParent();
6210 return Legalized;
6211 }
6212
6213 return UnableToLegalize;
6214}
6215
6218 LLT NarrowTy) {
6219 if (TypeIdx != 1)
6220 return UnableToLegalize;
6221
6222 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6223 unsigned NarrowSize = NarrowTy.getSizeInBits();
6224
6225 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6226 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6227
6229 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6230 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6231 auto C_0 = B.buildConstant(NarrowTy, 0);
6232 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6233 UnmergeSrc.getReg(0), C_0);
6234 auto HiCTTZ = IsUndef ?
6235 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6236 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6237 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6238 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6239 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6240 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6241
6242 MI.eraseFromParent();
6243 return Legalized;
6244 }
6245
6246 return UnableToLegalize;
6247}
6248
6251 LLT NarrowTy) {
6252 if (TypeIdx != 1)
6253 return UnableToLegalize;
6254
6255 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6256 unsigned NarrowSize = NarrowTy.getSizeInBits();
6257
6258 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6259 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6260
6261 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6262 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6263 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6264
6265 MI.eraseFromParent();
6266 return Legalized;
6267 }
6268
6269 return UnableToLegalize;
6270}
6271
6274 LLT NarrowTy) {
6275 if (TypeIdx != 1)
6276 return UnableToLegalize;
6277
6279 Register ExpReg = MI.getOperand(2).getReg();
6280 LLT ExpTy = MRI.getType(ExpReg);
6281
6282 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6283
6284 // Clamp the exponent to the range of the target type.
6285 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6286 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6287 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6288 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6289
6290 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6292 MI.getOperand(2).setReg(Trunc.getReg(0));
6294 return Legalized;
6295}
6296
6299 unsigned Opc = MI.getOpcode();
6300 const auto &TII = MIRBuilder.getTII();
6301 auto isSupported = [this](const LegalityQuery &Q) {
6302 auto QAction = LI.getAction(Q).Action;
6303 return QAction == Legal || QAction == Libcall || QAction == Custom;
6304 };
6305 switch (Opc) {
6306 default:
6307 return UnableToLegalize;
6308 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6309 // This trivially expands to CTLZ.
6311 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6313 return Legalized;
6314 }
6315 case TargetOpcode::G_CTLZ: {
6316 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6317 unsigned Len = SrcTy.getSizeInBits();
6318
6319 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6320 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6321 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6322 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6323 auto ICmp = MIRBuilder.buildICmp(
6324 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6325 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6326 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6327 MI.eraseFromParent();
6328 return Legalized;
6329 }
6330 // for now, we do this:
6331 // NewLen = NextPowerOf2(Len);
6332 // x = x | (x >> 1);
6333 // x = x | (x >> 2);
6334 // ...
6335 // x = x | (x >>16);
6336 // x = x | (x >>32); // for 64-bit input
6337 // Upto NewLen/2
6338 // return Len - popcount(x);
6339 //
6340 // Ref: "Hacker's Delight" by Henry Warren
6341 Register Op = SrcReg;
6342 unsigned NewLen = PowerOf2Ceil(Len);
6343 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6344 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6345 auto MIBOp = MIRBuilder.buildOr(
6346 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6347 Op = MIBOp.getReg(0);
6348 }
6349 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6350 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6351 MIBPop);
6352 MI.eraseFromParent();
6353 return Legalized;
6354 }
6355 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6356 // This trivially expands to CTTZ.
6358 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6360 return Legalized;
6361 }
6362 case TargetOpcode::G_CTTZ: {
6363 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6364
6365 unsigned Len = SrcTy.getSizeInBits();
6366 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6367 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6368 // zero.
6369 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6370 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6371 auto ICmp = MIRBuilder.buildICmp(
6372 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6373 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6374 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6375 MI.eraseFromParent();
6376 return Legalized;
6377 }
6378 // for now, we use: { return popcount(~x & (x - 1)); }
6379 // unless the target has ctlz but not ctpop, in which case we use:
6380 // { return 32 - nlz(~x & (x-1)); }
6381 // Ref: "Hacker's Delight" by Henry Warren
6382 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6383 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6384 auto MIBTmp = MIRBuilder.buildAnd(
6385 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6386 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6387 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6388 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6389 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6390 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6391 MI.eraseFromParent();
6392 return Legalized;
6393 }
6395 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6396 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6398 return Legalized;
6399 }
6400 case TargetOpcode::G_CTPOP: {
6401 Register SrcReg = MI.getOperand(1).getReg();
6402 LLT Ty = MRI.getType(SrcReg);
6403 unsigned Size = Ty.getSizeInBits();
6405
6406 // Count set bits in blocks of 2 bits. Default approach would be
6407 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6408 // We use following formula instead:
6409 // B2Count = val - { (val >> 1) & 0x55555555 }
6410 // since it gives same result in blocks of 2 with one instruction less.
6411 auto C_1 = B.buildConstant(Ty, 1);
6412 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6413 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6414 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6415 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6416 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6417
6418 // In order to get count in blocks of 4 add values from adjacent block of 2.
6419 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6420 auto C_2 = B.buildConstant(Ty, 2);
6421 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6422 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6423 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6424 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6425 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6426 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6427
6428 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6429 // addition since count value sits in range {0,...,8} and 4 bits are enough
6430 // to hold such binary values. After addition high 4 bits still hold count
6431 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6432 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6433 auto C_4 = B.buildConstant(Ty, 4);
6434 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6435 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6436 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6437 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6438 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6439
6440 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6441 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6442 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6443 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6444
6445 // Shift count result from 8 high bits to low bits.
6446 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6447
6448 auto IsMulSupported = [this](const LLT Ty) {
6449 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6450 return Action == Legal || Action == WidenScalar || Action == Custom;
6451 };
6452 if (IsMulSupported(Ty)) {
6453 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6454 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6455 } else {
6456 auto ResTmp = B8Count;
6457 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6458 auto ShiftC = B.buildConstant(Ty, Shift);
6459 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6460 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6461 }
6462 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6463 }
6464 MI.eraseFromParent();
6465 return Legalized;
6466 }
6467 }
6468}
6469
6470// Check that (every element of) Reg is undef or not an exact multiple of BW.
6472 Register Reg, unsigned BW) {
6473 return matchUnaryPredicate(
6474 MRI, Reg,
6475 [=](const Constant *C) {
6476 // Null constant here means an undef.
6477 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6478 return !CI || CI->getValue().urem(BW) != 0;
6479 },
6480 /*AllowUndefs*/ true);
6481}
6482
6485 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6486 LLT Ty = MRI.getType(Dst);
6487 LLT ShTy = MRI.getType(Z);
6488
6489 unsigned BW = Ty.getScalarSizeInBits();
6490
6491 if (!isPowerOf2_32(BW))
6492 return UnableToLegalize;
6493
6494 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6495 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6496
6497 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6498 // fshl X, Y, Z -> fshr X, Y, -Z
6499 // fshr X, Y, Z -> fshl X, Y, -Z
6500 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6501 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6502 } else {
6503 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6504 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6505 auto One = MIRBuilder.buildConstant(ShTy, 1);
6506 if (IsFSHL) {
6507 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6508 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6509 } else {
6510 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6511 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6512 }
6513
6514 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6515 }
6516
6517 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6518 MI.eraseFromParent();
6519 return Legalized;
6520}
6521
6524 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6525 LLT Ty = MRI.getType(Dst);
6526 LLT ShTy = MRI.getType(Z);
6527
6528 const unsigned BW = Ty.getScalarSizeInBits();
6529 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6530
6531 Register ShX, ShY;
6532 Register ShAmt, InvShAmt;
6533
6534 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6535 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6536 // fshl: X << C | Y >> (BW - C)
6537 // fshr: X << (BW - C) | Y >> C
6538 // where C = Z % BW is not zero
6539 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6540 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6541 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6542 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6543 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6544 } else {
6545 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6546 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6547 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6548 if (isPowerOf2_32(BW)) {
6549 // Z % BW -> Z & (BW - 1)
6550 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6551 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6552 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6553 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6554 } else {
6555 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6556 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6557 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6558 }
6559
6560 auto One = MIRBuilder.buildConstant(ShTy, 1);
6561 if (IsFSHL) {
6562 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6563 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6564 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6565 } else {
6566 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6567 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6568 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6569 }
6570 }
6571
6572 MIRBuilder.buildOr(Dst, ShX, ShY);
6573 MI.eraseFromParent();
6574 return Legalized;
6575}
6576
6579 // These operations approximately do the following (while avoiding undefined
6580 // shifts by BW):
6581 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6582 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6583 Register Dst = MI.getOperand(0).getReg();
6584 LLT Ty = MRI.getType(Dst);
6585 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6586
6587 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6588 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6589
6590 // TODO: Use smarter heuristic that accounts for vector legalization.
6591 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6592 return lowerFunnelShiftAsShifts(MI);
6593
6594 // This only works for powers of 2, fallback to shifts if it fails.
6595 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6596 if (Result == UnableToLegalize)
6597 return lowerFunnelShiftAsShifts(MI);
6598 return Result;
6599}
6600
6602 auto [Dst, Src] = MI.getFirst2Regs();
6603 LLT DstTy = MRI.getType(Dst);
6604 LLT SrcTy = MRI.getType(Src);
6605
6606 uint32_t DstTySize = DstTy.getSizeInBits();
6607 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6608 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6609
6610 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6611 !isPowerOf2_32(SrcTyScalarSize))
6612 return UnableToLegalize;
6613
6614 // The step between extend is too large, split it by creating an intermediate
6615 // extend instruction
6616 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6617 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6618 // If the destination type is illegal, split it into multiple statements
6619 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6620 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6621 // Unmerge the vector
6622 LLT EltTy = MidTy.changeElementCount(
6624 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6625
6626 // ZExt the vectors
6627 LLT ZExtResTy = DstTy.changeElementCount(
6629 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6630 {UnmergeSrc.getReg(0)});
6631 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6632 {UnmergeSrc.getReg(1)});
6633
6634 // Merge the ending vectors
6635 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6636
6637 MI.eraseFromParent();
6638 return Legalized;
6639 }
6640 return UnableToLegalize;
6641}
6642
6644 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6646 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6647 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6648 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6649 // %lo16(<4 x s16>) = G_TRUNC %inlo
6650 // %hi16(<4 x s16>) = G_TRUNC %inhi
6651 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6652 // %res(<8 x s8>) = G_TRUNC %in16
6653
6654 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6655
6656 Register DstReg = MI.getOperand(0).getReg();
6657 Register SrcReg = MI.getOperand(1).getReg();
6658 LLT DstTy = MRI.getType(DstReg);
6659 LLT SrcTy = MRI.getType(SrcReg);
6660
6661 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6663 isPowerOf2_32(SrcTy.getNumElements()) &&
6665 // Split input type.
6666 LLT SplitSrcTy = SrcTy.changeElementCount(
6668
6669 // First, split the source into two smaller vectors.
6670 SmallVector<Register, 2> SplitSrcs;
6671 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6672
6673 // Truncate the splits into intermediate narrower elements.
6674 LLT InterTy;
6675 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6676 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6677 else
6678 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6679 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6680 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6681 }
6682
6683 // Combine the new truncates into one vector
6685 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6686
6687 // Truncate the new vector to the final result type
6688 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6689 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6690 else
6691 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6692
6693 MI.eraseFromParent();
6694
6695 return Legalized;
6696 }
6697 return UnableToLegalize;
6698}
6699
6702 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6703 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6704 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6705 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6706 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6707 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6708 MI.eraseFromParent();
6709 return Legalized;
6710}
6711
6713 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6714
6715 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6716 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6717
6719
6720 // If a rotate in the other direction is supported, use it.
6721 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6722 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6723 isPowerOf2_32(EltSizeInBits))
6724 return lowerRotateWithReverseRotate(MI);
6725
6726 // If a funnel shift is supported, use it.
6727 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6728 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6729 bool IsFShLegal = false;
6730 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6731 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6732 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6733 Register R3) {
6734 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6735 MI.eraseFromParent();
6736 return Legalized;
6737 };
6738 // If a funnel shift in the other direction is supported, use it.
6739 if (IsFShLegal) {
6740 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6741 } else if (isPowerOf2_32(EltSizeInBits)) {
6742 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6743 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6744 }
6745 }
6746
6747 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6748 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6749 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6750 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6751 Register ShVal;
6752 Register RevShiftVal;
6753 if (isPowerOf2_32(EltSizeInBits)) {
6754 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6755 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6756 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6757 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6758 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6759 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6760 RevShiftVal =
6761 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6762 } else {
6763 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6764 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6765 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6766 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6767 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6768 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6769 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6770 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6771 RevShiftVal =
6772 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6773 }
6774 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6775 MI.eraseFromParent();
6776 return Legalized;
6777}
6778
6779// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6780// representation.
6783 auto [Dst, Src] = MI.getFirst2Regs();
6784 const LLT S64 = LLT::scalar(64);
6785 const LLT S32 = LLT::scalar(32);
6786 const LLT S1 = LLT::scalar(1);
6787
6788 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6789
6790 // unsigned cul2f(ulong u) {
6791 // uint lz = clz(u);
6792 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6793 // u = (u << lz) & 0x7fffffffffffffffUL;
6794 // ulong t = u & 0xffffffffffUL;
6795 // uint v = (e << 23) | (uint)(u >> 40);
6796 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6797 // return as_float(v + r);
6798 // }
6799
6800 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6801 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6802
6803 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6804
6805 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6806 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6807
6808 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6809 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6810
6811 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6812 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6813
6814 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6815
6816 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6817 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6818
6819 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6820 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6821 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6822
6823 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6824 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6825 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6826 auto One = MIRBuilder.buildConstant(S32, 1);
6827
6828 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6829 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6830 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6831 MIRBuilder.buildAdd(Dst, V, R);
6832
6833 MI.eraseFromParent();
6834 return Legalized;
6835}
6836
6838 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6839
6840 if (SrcTy == LLT::scalar(1)) {
6841 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6842 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6843 MIRBuilder.buildSelect(Dst, Src, True, False);
6844 MI.eraseFromParent();
6845 return Legalized;
6846 }
6847
6848 if (SrcTy != LLT::scalar(64))
6849 return UnableToLegalize;
6850
6851 if (DstTy == LLT::scalar(32)) {
6852 // TODO: SelectionDAG has several alternative expansions to port which may
6853 // be more reasonble depending on the available instructions. If a target
6854 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6855 // intermediate type, this is probably worse.
6856 return lowerU64ToF32BitOps(MI);
6857 }
6858
6859 return UnableToLegalize;
6860}
6861
6863 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6864
6865 const LLT S64 = LLT::scalar(64);
6866 const LLT S32 = LLT::scalar(32);
6867 const LLT S1 = LLT::scalar(1);
6868
6869 if (SrcTy == S1) {
6870 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6871 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6872 MIRBuilder.buildSelect(Dst, Src, True, False);
6873 MI.eraseFromParent();
6874 return Legalized;
6875 }
6876
6877 if (SrcTy != S64)
6878 return UnableToLegalize;
6879
6880 if (DstTy == S32) {
6881 // signed cl2f(long l) {
6882 // long s = l >> 63;
6883 // float r = cul2f((l + s) ^ s);
6884 // return s ? -r : r;
6885 // }
6886 Register L = Src;
6887 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6888 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6889
6890 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6891 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6892 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6893
6894 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6895 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6897 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6898 MI.eraseFromParent();
6899 return Legalized;
6900 }
6901
6902 return UnableToLegalize;
6903}
6904
6906 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6907 const LLT S64 = LLT::scalar(64);
6908 const LLT S32 = LLT::scalar(32);
6909
6910 if (SrcTy != S64 && SrcTy != S32)
6911 return UnableToLegalize;
6912 if (DstTy != S32 && DstTy != S64)
6913 return UnableToLegalize;
6914
6915 // FPTOSI gives same result as FPTOUI for positive signed integers.
6916 // FPTOUI needs to deal with fp values that convert to unsigned integers
6917 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6918
6919 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
6920 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6922 APInt::getZero(SrcTy.getSizeInBits()));
6923 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
6924
6925 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
6926
6927 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
6928 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6929 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6930 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
6931 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
6932 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
6933 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
6934
6935 const LLT S1 = LLT::scalar(1);
6936
6937 MachineInstrBuilder FCMP =
6938 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
6939 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
6940
6941 MI.eraseFromParent();
6942 return Legalized;
6943}
6944
6946 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6947 const LLT S64 = LLT::scalar(64);
6948 const LLT S32 = LLT::scalar(32);
6949
6950 // FIXME: Only f32 to i64 conversions are supported.
6951 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6952 return UnableToLegalize;
6953
6954 // Expand f32 -> i64 conversion
6955 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6956 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6957
6958 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6959
6960 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
6961 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
6962
6963 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
6964 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
6965
6966 auto SignMask = MIRBuilder.buildConstant(SrcTy,
6967 APInt::getSignMask(SrcEltBits));
6968 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
6969 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
6970 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
6971 Sign = MIRBuilder.buildSExt(DstTy, Sign);
6972
6973 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
6974 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
6975 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
6976
6977 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
6978 R = MIRBuilder.buildZExt(DstTy, R);
6979
6980 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
6981 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
6982 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
6983 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
6984
6985 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
6986 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
6987
6988 const LLT S1 = LLT::scalar(1);
6990 S1, Exponent, ExponentLoBit);
6991
6992 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
6993
6994 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
6995 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
6996
6997 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
6998
6999 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7000 S1, Exponent, ZeroSrcTy);
7001
7002 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7003 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7004
7005 MI.eraseFromParent();
7006 return Legalized;
7007}
7008
7009// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7012 const LLT S1 = LLT::scalar(1);
7013 const LLT S32 = LLT::scalar(32);
7014
7015 auto [Dst, Src] = MI.getFirst2Regs();
7016 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7017 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7018
7019 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7020 return UnableToLegalize;
7021
7023 unsigned Flags = MI.getFlags();
7024 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7025 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7026 MI.eraseFromParent();
7027 return Legalized;
7028 }
7029
7030 const unsigned ExpMask = 0x7ff;
7031 const unsigned ExpBiasf64 = 1023;
7032 const unsigned ExpBiasf16 = 15;
7033
7034 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7035 Register U = Unmerge.getReg(0);
7036 Register UH = Unmerge.getReg(1);
7037
7038 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7040
7041 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7042 // add the f16 bias (15) to get the biased exponent for the f16 format.
7043 E = MIRBuilder.buildAdd(
7044 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7045
7048
7049 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7050 MIRBuilder.buildConstant(S32, 0x1ff));
7051 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7052
7053 auto Zero = MIRBuilder.buildConstant(S32, 0);
7054 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7055 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7056 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7057
7058 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7059 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7060 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7061 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7062
7063 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7064 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7065
7066 // N = M | (E << 12);
7067 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7068 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7069
7070 // B = clamp(1-E, 0, 13);
7071 auto One = MIRBuilder.buildConstant(S32, 1);
7072 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7073 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7075
7076 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7077 MIRBuilder.buildConstant(S32, 0x1000));
7078
7079 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7080 auto D0 = MIRBuilder.buildShl(S32, D, B);
7081
7082 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7083 D0, SigSetHigh);
7084 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7085 D = MIRBuilder.buildOr(S32, D, D1);
7086
7087 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7088 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7089
7090 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7092
7093 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7095 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7096
7097 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7099 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7100
7101 V1 = MIRBuilder.buildOr(S32, V0, V1);
7102 V = MIRBuilder.buildAdd(S32, V, V1);
7103
7104 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7105 E, MIRBuilder.buildConstant(S32, 30));
7106 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7107 MIRBuilder.buildConstant(S32, 0x7c00), V);
7108
7109 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7110 E, MIRBuilder.buildConstant(S32, 1039));
7111 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7112
7113 // Extract the sign bit.
7114 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7115 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7116
7117 // Insert the sign bit
7118 V = MIRBuilder.buildOr(S32, Sign, V);
7119
7120 MIRBuilder.buildTrunc(Dst, V);
7121 MI.eraseFromParent();
7122 return Legalized;
7123}
7124
7127 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7128 const LLT S64 = LLT::scalar(64);
7129 const LLT S16 = LLT::scalar(16);
7130
7131 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7133
7134 return UnableToLegalize;
7135}
7136
7137// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7138// multiplication tree.
7140 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7141 LLT Ty = MRI.getType(Dst);
7142
7143 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7144 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7145 MI.eraseFromParent();
7146 return Legalized;
7147}
7148
7150 switch (Opc) {
7151 case TargetOpcode::G_SMIN:
7152 return CmpInst::ICMP_SLT;
7153 case TargetOpcode::G_SMAX:
7154 return CmpInst::ICMP_SGT;
7155 case TargetOpcode::G_UMIN:
7156 return CmpInst::ICMP_ULT;
7157 case TargetOpcode::G_UMAX:
7158 return CmpInst::ICMP_UGT;
7159 default:
7160 llvm_unreachable("not in integer min/max");
7161 }
7162}
7163
7165 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7166
7167 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7168 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7169
7170 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7171 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7172
7173 MI.eraseFromParent();
7174 return Legalized;
7175}
7176
7179 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7180 const int Src0Size = Src0Ty.getScalarSizeInBits();
7181 const int Src1Size = Src1Ty.getScalarSizeInBits();
7182
7183 auto SignBitMask = MIRBuilder.buildConstant(
7184 Src0Ty, APInt::getSignMask(Src0Size));
7185
7186 auto NotSignBitMask = MIRBuilder.buildConstant(
7187 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7188
7189 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7190 Register And1;
7191 if (Src0Ty == Src1Ty) {
7192 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7193 } else if (Src0Size > Src1Size) {
7194 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7195 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7196 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7197 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7198 } else {
7199 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7200 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7201 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7202 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7203 }
7204
7205 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7206 // constants are a nan and -0.0, but the final result should preserve
7207 // everything.
7208 unsigned Flags = MI.getFlags();
7209 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7210
7211 MI.eraseFromParent();
7212 return Legalized;
7213}
7214
7217 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7218 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7219
7220 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7221 LLT Ty = MRI.getType(Dst);
7222
7223 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7224 // Insert canonicalizes if it's possible we need to quiet to get correct
7225 // sNaN behavior.
7226
7227 // Note this must be done here, and not as an optimization combine in the
7228 // absence of a dedicate quiet-snan instruction as we're using an
7229 // omni-purpose G_FCANONICALIZE.
7230 if (!isKnownNeverSNaN(Src0, MRI))
7231 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7232
7233 if (!isKnownNeverSNaN(Src1, MRI))
7234 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7235 }
7236
7237 // If there are no nans, it's safe to simply replace this with the non-IEEE
7238 // version.
7239 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7240 MI.eraseFromParent();
7241 return Legalized;
7242}
7243
7245 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7246 Register DstReg = MI.getOperand(0).getReg();
7247 LLT Ty = MRI.getType(DstReg);
7248 unsigned Flags = MI.getFlags();
7249
7250 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7251 Flags);
7252 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7253 MI.eraseFromParent();
7254 return Legalized;
7255}
7256
7259 auto [DstReg, X] = MI.getFirst2Regs();
7260 const unsigned Flags = MI.getFlags();
7261 const LLT Ty = MRI.getType(DstReg);
7262 const LLT CondTy = Ty.changeElementSize(1);
7263
7264 // round(x) =>
7265 // t = trunc(x);
7266 // d = fabs(x - t);
7267 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7268 // return t + o;
7269
7270 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7271
7272 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7273 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7274
7275 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7276 auto Cmp =
7277 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7278
7279 // Could emit G_UITOFP instead
7280 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7281 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7282 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7283 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7284
7285 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7286
7287 MI.eraseFromParent();
7288 return Legalized;
7289}
7290
7292 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7293 unsigned Flags = MI.getFlags();
7294 LLT Ty = MRI.getType(DstReg);
7295 const LLT CondTy = Ty.changeElementSize(1);
7296
7297 // result = trunc(src);
7298 // if (src < 0.0 && src != result)
7299 // result += -1.0.
7300
7301 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7302 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7303
7304 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7305 SrcReg, Zero, Flags);
7306 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7307 SrcReg, Trunc, Flags);
7308 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7309 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7310
7311 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7312 MI.eraseFromParent();
7313 return Legalized;
7314}
7315
7318 const unsigned NumOps = MI.getNumOperands();
7319 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7320 unsigned PartSize = Src0Ty.getSizeInBits();
7321
7322 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7323 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7324
7325 for (unsigned I = 2; I != NumOps; ++I) {
7326 const unsigned Offset = (I - 1) * PartSize;
7327
7328 Register SrcReg = MI.getOperand(I).getReg();
7329 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7330
7331 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7332 MRI.createGenericVirtualRegister(WideTy);
7333
7334 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7335 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7336 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7337 ResultReg = NextResult;
7338 }
7339
7340 if (DstTy.isPointer()) {
7342 DstTy.getAddressSpace())) {
7343 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7344 return UnableToLegalize;
7345 }
7346
7347 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7348 }
7349
7350 MI.eraseFromParent();
7351 return Legalized;
7352}
7353
7356 const unsigned NumDst = MI.getNumOperands() - 1;
7357 Register SrcReg = MI.getOperand(NumDst).getReg();
7358 Register Dst0Reg = MI.getOperand(0).getReg();
7359 LLT DstTy = MRI.getType(Dst0Reg);
7360 if (DstTy.isPointer())
7361 return UnableToLegalize; // TODO
7362
7363 SrcReg = coerceToScalar(SrcReg);
7364 if (!SrcReg)
7365 return UnableToLegalize;
7366
7367 // Expand scalarizing unmerge as bitcast to integer and shift.
7368 LLT IntTy = MRI.getType(SrcReg);
7369
7370 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7371
7372 const unsigned DstSize = DstTy.getSizeInBits();
7373 unsigned Offset = DstSize;
7374 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7375 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7376 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7377 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7378 }
7379
7380 MI.eraseFromParent();
7381 return Legalized;
7382}
7383
7384/// Lower a vector extract or insert by writing the vector to a stack temporary
7385/// and reloading the element or vector.
7386///
7387/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7388/// =>
7389/// %stack_temp = G_FRAME_INDEX
7390/// G_STORE %vec, %stack_temp
7391/// %idx = clamp(%idx, %vec.getNumElements())
7392/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7393/// %dst = G_LOAD %element_ptr
7396 Register DstReg = MI.getOperand(0).getReg();
7397 Register SrcVec = MI.getOperand(1).getReg();
7398 Register InsertVal;
7399 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7400 InsertVal = MI.getOperand(2).getReg();
7401
7402 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7403
7404 LLT VecTy = MRI.getType(SrcVec);
7405 LLT EltTy = VecTy.getElementType();
7406 unsigned NumElts = VecTy.getNumElements();
7407
7408 int64_t IdxVal;
7409 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7411 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7412
7413 if (InsertVal) {
7414 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7415 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7416 } else {
7417 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7418 }
7419
7420 MI.eraseFromParent();
7421 return Legalized;
7422 }
7423
7424 if (!EltTy.isByteSized()) { // Not implemented.
7425 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7426 return UnableToLegalize;
7427 }
7428
7429 unsigned EltBytes = EltTy.getSizeInBytes();
7430 Align VecAlign = getStackTemporaryAlignment(VecTy);
7431 Align EltAlign;
7432
7433 MachinePointerInfo PtrInfo;
7434 auto StackTemp = createStackTemporary(
7435 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7436 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7437
7438 // Get the pointer to the element, and be sure not to hit undefined behavior
7439 // if the index is out of bounds.
7440 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7441
7442 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7443 int64_t Offset = IdxVal * EltBytes;
7444 PtrInfo = PtrInfo.getWithOffset(Offset);
7445 EltAlign = commonAlignment(VecAlign, Offset);
7446 } else {
7447 // We lose information with a variable offset.
7448 EltAlign = getStackTemporaryAlignment(EltTy);
7449 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7450 }
7451
7452 if (InsertVal) {
7453 // Write the inserted element
7454 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7455
7456 // Reload the whole vector.
7457 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7458 } else {
7459 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7460 }
7461
7462 MI.eraseFromParent();
7463 return Legalized;
7464}
7465
7468 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7469 MI.getFirst3RegLLTs();
7470 LLT IdxTy = LLT::scalar(32);
7471
7472 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7473 Register Undef;
7475 LLT EltTy = DstTy.getScalarType();
7476
7477 for (int Idx : Mask) {
7478 if (Idx < 0) {
7479 if (!Undef.isValid())
7480 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7481 BuildVec.push_back(Undef);
7482 continue;
7483 }
7484
7485 if (Src0Ty.isScalar()) {
7486 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7487 } else {
7488 int NumElts = Src0Ty.getNumElements();
7489 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7490 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7491 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7492 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7493 BuildVec.push_back(Extract.getReg(0));
7494 }
7495 }
7496
7497 if (DstTy.isScalar())
7498 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7499 else
7500 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7501 MI.eraseFromParent();
7502 return Legalized;
7503}
7504
7506 Register AllocSize,
7507 Align Alignment,
7508 LLT PtrTy) {
7509 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7510
7511 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7512 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7513
7514 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7515 // have to generate an extra instruction to negate the alloc and then use
7516 // G_PTR_ADD to add the negative offset.
7517 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7518 if (Alignment > Align(1)) {
7519 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7520 AlignMask.negate();
7521 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7522 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7523 }
7524
7525 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7526}
7527
7530 const auto &MF = *MI.getMF();
7531 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7532 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7533 return UnableToLegalize;
7534
7535 Register Dst = MI.getOperand(0).getReg();
7536 Register AllocSize = MI.getOperand(1).getReg();
7537 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7538
7539 LLT PtrTy = MRI.getType(Dst);
7541 Register SPTmp =
7542 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7543
7544 MIRBuilder.buildCopy(SPReg, SPTmp);
7545 MIRBuilder.buildCopy(Dst, SPTmp);
7546
7547 MI.eraseFromParent();
7548 return Legalized;
7549}
7550
7554 if (!StackPtr)
7555 return UnableToLegalize;
7556
7557 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7558 MI.eraseFromParent();
7559 return Legalized;
7560}
7561
7565 if (!StackPtr)
7566 return UnableToLegalize;
7567
7568 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7569 MI.eraseFromParent();
7570 return Legalized;
7571}
7572
7575 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7576 unsigned Offset = MI.getOperand(2).getImm();
7577
7578 // Extract sub-vector or one element
7579 if (SrcTy.isVector()) {
7580 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7581 unsigned DstSize = DstTy.getSizeInBits();
7582
7583 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7584 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7585 // Unmerge and allow access to each Src element for the artifact combiner.
7586 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7587
7588 // Take element(s) we need to extract and copy it (merge them).
7589 SmallVector<Register, 8> SubVectorElts;
7590 for (unsigned Idx = Offset / SrcEltSize;
7591 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7592 SubVectorElts.push_back(Unmerge.getReg(Idx));
7593 }
7594 if (SubVectorElts.size() == 1)
7595 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7596 else
7597 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7598
7599 MI.eraseFromParent();
7600 return Legalized;
7601 }
7602 }
7603
7604 if (DstTy.isScalar() &&
7605 (SrcTy.isScalar() ||
7606 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7607 LLT SrcIntTy = SrcTy;
7608 if (!SrcTy.isScalar()) {
7609 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7610 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7611 }
7612
7613 if (Offset == 0)
7614 MIRBuilder.buildTrunc(DstReg, SrcReg);
7615 else {
7616 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7617 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7618 MIRBuilder.buildTrunc(DstReg, Shr);
7619 }
7620
7621 MI.eraseFromParent();
7622 return Legalized;
7623 }
7624
7625 return UnableToLegalize;
7626}
7627
7629 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7630 uint64_t Offset = MI.getOperand(3).getImm();
7631
7632 LLT DstTy = MRI.getType(Src);
7633 LLT InsertTy = MRI.getType(InsertSrc);
7634
7635 // Insert sub-vector or one element
7636 if (DstTy.isVector() && !InsertTy.isPointer()) {
7637 LLT EltTy = DstTy.getElementType();
7638 unsigned EltSize = EltTy.getSizeInBits();
7639 unsigned InsertSize = InsertTy.getSizeInBits();
7640
7641 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7642 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7643 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7645 unsigned Idx = 0;
7646 // Elements from Src before insert start Offset
7647 for (; Idx < Offset / EltSize; ++Idx) {
7648 DstElts.push_back(UnmergeSrc.getReg(Idx));
7649 }
7650
7651 // Replace elements in Src with elements from InsertSrc
7652 if (InsertTy.getSizeInBits() > EltSize) {
7653 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7654 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7655 ++Idx, ++i) {
7656 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7657 }
7658 } else {
7659 DstElts.push_back(InsertSrc);
7660 ++Idx;
7661 }
7662
7663 // Remaining elements from Src after insert
7664 for (; Idx < DstTy.getNumElements(); ++Idx) {
7665 DstElts.push_back(UnmergeSrc.getReg(Idx));
7666 }
7667
7668 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7669 MI.eraseFromParent();
7670 return Legalized;
7671 }
7672 }
7673
7674 if (InsertTy.isVector() ||
7675 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7676 return UnableToLegalize;
7677
7679 if ((DstTy.isPointer() &&
7680 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7681 (InsertTy.isPointer() &&
7682 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7683 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7684 return UnableToLegalize;
7685 }
7686
7687 LLT IntDstTy = DstTy;
7688
7689 if (!DstTy.isScalar()) {
7690 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7691 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7692 }
7693
7694 if (!InsertTy.isScalar()) {
7695 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7696 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7697 }
7698
7699 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7700 if (Offset != 0) {
7701 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7702 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7703 }
7704
7706 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7707
7708 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7709 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7710 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7711
7712 MIRBuilder.buildCast(Dst, Or);
7713 MI.eraseFromParent();
7714 return Legalized;
7715}
7716
7719 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7720 MI.getFirst4RegLLTs();
7721 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7722
7723 LLT Ty = Dst0Ty;
7724 LLT BoolTy = Dst1Ty;
7725
7726 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7727
7728 if (IsAdd)
7729 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
7730 else
7731 MIRBuilder.buildSub(NewDst0, LHS, RHS);
7732
7733 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7734
7735 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7736
7737 // For an addition, the result should be less than one of the operands (LHS)
7738 // if and only if the other operand (RHS) is negative, otherwise there will
7739 // be overflow.
7740 // For a subtraction, the result should be less than one of the operands
7741 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7742 // otherwise there will be overflow.
7743 auto ResultLowerThanLHS =
7744 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
7745 auto ConditionRHS = MIRBuilder.buildICmp(
7746 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7747
7748 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7749
7750 MIRBuilder.buildCopy(Dst0, NewDst0);
7751 MI.eraseFromParent();
7752
7753 return Legalized;
7754}
7755
7758 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7759 LLT Ty = MRI.getType(Res);
7760 bool IsSigned;
7761 bool IsAdd;
7762 unsigned BaseOp;
7763 switch (MI.getOpcode()) {
7764 default:
7765 llvm_unreachable("unexpected addsat/subsat opcode");
7766 case TargetOpcode::G_UADDSAT:
7767 IsSigned = false;
7768 IsAdd = true;
7769 BaseOp = TargetOpcode::G_ADD;
7770 break;
7771 case TargetOpcode::G_SADDSAT:
7772 IsSigned = true;
7773 IsAdd = true;
7774 BaseOp = TargetOpcode::G_ADD;
7775 break;
7776 case TargetOpcode::G_USUBSAT:
7777 IsSigned = false;
7778 IsAdd = false;
7779 BaseOp = TargetOpcode::G_SUB;
7780 break;
7781 case TargetOpcode::G_SSUBSAT:
7782 IsSigned = true;
7783 IsAdd = false;
7784 BaseOp = TargetOpcode::G_SUB;
7785 break;
7786 }
7787
7788 if (IsSigned) {
7789 // sadd.sat(a, b) ->
7790 // hi = 0x7fffffff - smax(a, 0)
7791 // lo = 0x80000000 - smin(a, 0)
7792 // a + smin(smax(lo, b), hi)
7793 // ssub.sat(a, b) ->
7794 // lo = smax(a, -1) - 0x7fffffff
7795 // hi = smin(a, -1) - 0x80000000
7796 // a - smin(smax(lo, b), hi)
7797 // TODO: AMDGPU can use a "median of 3" instruction here:
7798 // a +/- med3(lo, b, hi)
7799 uint64_t NumBits = Ty.getScalarSizeInBits();
7800 auto MaxVal =
7802 auto MinVal =
7805 if (IsAdd) {
7806 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7807 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7808 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7809 } else {
7810 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7811 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7812 MaxVal);
7813 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7814 MinVal);
7815 }
7816 auto RHSClamped =
7818 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7819 } else {
7820 // uadd.sat(a, b) -> a + umin(~a, b)
7821 // usub.sat(a, b) -> a - umin(a, b)
7822 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7823 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7824 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7825 }
7826
7827 MI.eraseFromParent();
7828 return Legalized;
7829}
7830
7833 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7834 LLT Ty = MRI.getType(Res);
7835 LLT BoolTy = Ty.changeElementSize(1);
7836 bool IsSigned;
7837 bool IsAdd;
7838 unsigned OverflowOp;
7839 switch (MI.getOpcode()) {
7840 default:
7841 llvm_unreachable("unexpected addsat/subsat opcode");
7842 case TargetOpcode::G_UADDSAT:
7843 IsSigned = false;
7844 IsAdd = true;
7845 OverflowOp = TargetOpcode::G_UADDO;
7846 break;
7847 case TargetOpcode::G_SADDSAT:
7848 IsSigned = true;
7849 IsAdd = true;
7850 OverflowOp = TargetOpcode::G_SADDO;
7851 break;
7852 case TargetOpcode::G_USUBSAT:
7853 IsSigned = false;
7854 IsAdd = false;
7855 OverflowOp = TargetOpcode::G_USUBO;
7856 break;
7857 case TargetOpcode::G_SSUBSAT:
7858 IsSigned = true;
7859 IsAdd = false;
7860 OverflowOp = TargetOpcode::G_SSUBO;
7861 break;
7862 }
7863
7864 auto OverflowRes =
7865 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7866 Register Tmp = OverflowRes.getReg(0);
7867 Register Ov = OverflowRes.getReg(1);
7868 MachineInstrBuilder Clamp;
7869 if (IsSigned) {
7870 // sadd.sat(a, b) ->
7871 // {tmp, ov} = saddo(a, b)
7872 // ov ? (tmp >>s 31) + 0x80000000 : r
7873 // ssub.sat(a, b) ->
7874 // {tmp, ov} = ssubo(a, b)
7875 // ov ? (tmp >>s 31) + 0x80000000 : r
7876 uint64_t NumBits = Ty.getScalarSizeInBits();
7877 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7878 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7879 auto MinVal =
7881 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7882 } else {
7883 // uadd.sat(a, b) ->
7884 // {tmp, ov} = uaddo(a, b)
7885 // ov ? 0xffffffff : tmp
7886 // usub.sat(a, b) ->
7887 // {tmp, ov} = usubo(a, b)
7888 // ov ? 0 : tmp
7889 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7890 }
7891 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7892
7893 MI.eraseFromParent();
7894 return Legalized;
7895}
7896
7899 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7900 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7901 "Expected shlsat opcode!");
7902 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7903 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7904 LLT Ty = MRI.getType(Res);
7905 LLT BoolTy = Ty.changeElementSize(1);
7906
7907 unsigned BW = Ty.getScalarSizeInBits();
7908 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7909 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7910 : MIRBuilder.buildLShr(Ty, Result, RHS);
7911
7912 MachineInstrBuilder SatVal;
7913 if (IsSigned) {
7914 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7915 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7916 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7917 MIRBuilder.buildConstant(Ty, 0));
7918 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7919 } else {
7921 }
7922 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7923 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7924
7925 MI.eraseFromParent();
7926 return Legalized;
7927}
7928
7930 auto [Dst, Src] = MI.getFirst2Regs();
7931 const LLT Ty = MRI.getType(Src);
7932 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7933 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7934
7935 // Swap most and least significant byte, set remaining bytes in Res to zero.
7936 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7937 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7938 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7939 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7940
7941 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7942 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7943 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7944 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7945 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7946 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7947 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7948 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7949 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7950 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7951 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7952 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7953 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7954 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7955 }
7956 Res.getInstr()->getOperand(0).setReg(Dst);
7957
7958 MI.eraseFromParent();
7959 return Legalized;
7960}
7961
7962//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7964 MachineInstrBuilder Src, const APInt &Mask) {
7965 const LLT Ty = Dst.getLLTTy(*B.getMRI());
7966 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7967 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7968 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7969 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7970 return B.buildOr(Dst, LHS, RHS);
7971}
7972
7975 auto [Dst, Src] = MI.getFirst2Regs();
7976 const LLT Ty = MRI.getType(Src);
7977 unsigned Size = Ty.getSizeInBits();
7978
7979 MachineInstrBuilder BSWAP =
7980 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7981
7982 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7983 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7984 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7985 MachineInstrBuilder Swap4 =
7986 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7987
7988 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7989 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7990 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7991 MachineInstrBuilder Swap2 =
7992 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7993
7994 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7995 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7996 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7997 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7998
7999 MI.eraseFromParent();
8000 return Legalized;
8001}
8002
8006
8007 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8008 int NameOpIdx = IsRead ? 1 : 0;
8009 int ValRegIndex = IsRead ? 0 : 1;
8010
8011 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8012 const LLT Ty = MRI.getType(ValReg);
8013 const MDString *RegStr = cast<MDString>(
8014 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8015
8016 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8017 if (!PhysReg.isValid())
8018 return UnableToLegalize;
8019
8020 if (IsRead)
8021 MIRBuilder.buildCopy(ValReg, PhysReg);
8022 else
8023 MIRBuilder.buildCopy(PhysReg, ValReg);
8024
8025 MI.eraseFromParent();
8026 return Legalized;
8027}
8028
8031 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8032 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8033 Register Result = MI.getOperand(0).getReg();
8034 LLT OrigTy = MRI.getType(Result);
8035 auto SizeInBits = OrigTy.getScalarSizeInBits();
8036 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8037
8038 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8039 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8040 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8041 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8042
8043 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8044 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8045 MIRBuilder.buildTrunc(Result, Shifted);
8046
8047 MI.eraseFromParent();
8048 return Legalized;
8049}
8050
8053 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8054 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8055
8056 if (Mask == fcNone) {
8057 MIRBuilder.buildConstant(DstReg, 0);
8058 MI.eraseFromParent();
8059 return Legalized;
8060 }
8061 if (Mask == fcAllFlags) {
8062 MIRBuilder.buildConstant(DstReg, 1);
8063 MI.eraseFromParent();
8064 return Legalized;
8065 }
8066
8067 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8068 // version
8069
8070 unsigned BitSize = SrcTy.getScalarSizeInBits();
8071 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8072
8073 LLT IntTy = LLT::scalar(BitSize);
8074 if (SrcTy.isVector())
8075 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8076 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8077
8078 // Various masks.
8079 APInt SignBit = APInt::getSignMask(BitSize);
8080 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8081 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8082 APInt ExpMask = Inf;
8083 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8084 APInt QNaNBitMask =
8085 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8086 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8087
8088 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8089 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8090 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8091 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8092 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8093
8094 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8095 auto Sign =
8097
8098 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8099 // Clang doesn't support capture of structured bindings:
8100 LLT DstTyCopy = DstTy;
8101 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8102 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8103 };
8104
8105 // Tests that involve more than one class should be processed first.
8106 if ((Mask & fcFinite) == fcFinite) {
8107 // finite(V) ==> abs(V) u< exp_mask
8108 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8109 ExpMaskC));
8110 Mask &= ~fcFinite;
8111 } else if ((Mask & fcFinite) == fcPosFinite) {
8112 // finite(V) && V > 0 ==> V u< exp_mask
8113 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8114 ExpMaskC));
8115 Mask &= ~fcPosFinite;
8116 } else if ((Mask & fcFinite) == fcNegFinite) {
8117 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8118 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8119 ExpMaskC);
8120 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8121 appendToRes(And);
8122 Mask &= ~fcNegFinite;
8123 }
8124
8125 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8126 // fcZero | fcSubnormal => test all exponent bits are 0
8127 // TODO: Handle sign bit specific cases
8128 // TODO: Handle inverted case
8129 if (PartialCheck == (fcZero | fcSubnormal)) {
8130 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8132 ExpBits, ZeroC));
8133 Mask &= ~PartialCheck;
8134 }
8135 }
8136
8137 // Check for individual classes.
8138 if (FPClassTest PartialCheck = Mask & fcZero) {
8139 if (PartialCheck == fcPosZero)
8141 AsInt, ZeroC));
8142 else if (PartialCheck == fcZero)
8143 appendToRes(
8145 else // fcNegZero
8147 AsInt, SignBitC));
8148 }
8149
8150 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8151 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8152 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8153 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8154 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8155 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8156 auto SubnormalRes =
8158 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8159 if (PartialCheck == fcNegSubnormal)
8160 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8161 appendToRes(SubnormalRes);
8162 }
8163
8164 if (FPClassTest PartialCheck = Mask & fcInf) {
8165 if (PartialCheck == fcPosInf)
8167 AsInt, InfC));
8168 else if (PartialCheck == fcInf)
8169 appendToRes(
8171 else { // fcNegInf
8172 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8173 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8175 AsInt, NegInfC));
8176 }
8177 }
8178
8179 if (FPClassTest PartialCheck = Mask & fcNan) {
8180 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8181 if (PartialCheck == fcNan) {
8182 // isnan(V) ==> abs(V) u> int(inf)
8183 appendToRes(
8185 } else if (PartialCheck == fcQNan) {
8186 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8187 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8188 InfWithQnanBitC));
8189 } else { // fcSNan
8190 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8191 // abs(V) u< (unsigned(Inf) | quiet_bit)
8192 auto IsNan =
8194 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8195 Abs, InfWithQnanBitC);
8196 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8197 }
8198 }
8199
8200 if (FPClassTest PartialCheck = Mask & fcNormal) {
8201 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8202 // (max_exp-1))
8203 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8204 auto ExpMinusOne = MIRBuilder.buildSub(
8205 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8206 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8207 auto NormalRes =
8209 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8210 if (PartialCheck == fcNegNormal)
8211 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8212 else if (PartialCheck == fcPosNormal) {
8213 auto PosSign = MIRBuilder.buildXor(
8214 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8215 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8216 }
8217 appendToRes(NormalRes);
8218 }
8219
8220 MIRBuilder.buildCopy(DstReg, Res);
8221 MI.eraseFromParent();
8222 return Legalized;
8223}
8224
8226 // Implement G_SELECT in terms of XOR, AND, OR.
8227 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8228 MI.getFirst4RegLLTs();
8229
8230 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8231 if (IsEltPtr) {
8232 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8233 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8234 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8235 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8236 DstTy = NewTy;
8237 }
8238
8239 if (MaskTy.isScalar()) {
8240 // Turn the scalar condition into a vector condition mask if needed.
8241
8242 Register MaskElt = MaskReg;
8243
8244 // The condition was potentially zero extended before, but we want a sign
8245 // extended boolean.
8246 if (MaskTy != LLT::scalar(1))
8247 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8248
8249 // Continue the sign extension (or truncate) to match the data type.
8250 MaskElt =
8251 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8252
8253 if (DstTy.isVector()) {
8254 // Generate a vector splat idiom.
8255 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8256 MaskReg = ShufSplat.getReg(0);
8257 } else {
8258 MaskReg = MaskElt;
8259 }
8260 MaskTy = DstTy;
8261 } else if (!DstTy.isVector()) {
8262 // Cannot handle the case that mask is a vector and dst is a scalar.
8263 return UnableToLegalize;
8264 }
8265
8266 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8267 return UnableToLegalize;
8268 }
8269
8270 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8271 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8272 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8273 if (IsEltPtr) {
8274 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8275 MIRBuilder.buildIntToPtr(DstReg, Or);
8276 } else {
8277 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8278 }
8279 MI.eraseFromParent();
8280 return Legalized;
8281}
8282
8284 // Split DIVREM into individual instructions.
8285 unsigned Opcode = MI.getOpcode();
8286
8288 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8289 : TargetOpcode::G_UDIV,
8290 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8292 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8293 : TargetOpcode::G_UREM,
8294 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8295 MI.eraseFromParent();
8296 return Legalized;
8297}
8298
8301 // Expand %res = G_ABS %a into:
8302 // %v1 = G_ASHR %a, scalar_size-1
8303 // %v2 = G_ADD %a, %v1
8304 // %res = G_XOR %v2, %v1
8305 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8306 Register OpReg = MI.getOperand(1).getReg();
8307 auto ShiftAmt =
8308 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8309 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8310 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8311 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8312 MI.eraseFromParent();
8313 return Legalized;
8314}
8315
8318 // Expand %res = G_ABS %a into:
8319 // %v1 = G_CONSTANT 0
8320 // %v2 = G_SUB %v1, %a
8321 // %res = G_SMAX %a, %v2
8322 Register SrcReg = MI.getOperand(1).getReg();
8323 LLT Ty = MRI.getType(SrcReg);
8324 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8325 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8326 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8327 MI.eraseFromParent();
8328 return Legalized;
8329}
8330
8333 Register SrcReg = MI.getOperand(1).getReg();
8334 Register DestReg = MI.getOperand(0).getReg();
8335 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8336 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8337 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8338 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8339 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8340 MI.eraseFromParent();
8341 return Legalized;
8342}
8343
8346 Register SrcReg = MI.getOperand(1).getReg();
8347 LLT SrcTy = MRI.getType(SrcReg);
8348 LLT DstTy = MRI.getType(SrcReg);
8349
8350 // The source could be a scalar if the IR type was <1 x sN>.
8351 if (SrcTy.isScalar()) {
8352 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8353 return UnableToLegalize; // FIXME: handle extension.
8354 // This can be just a plain copy.
8356 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8358 return Legalized;
8359 }
8360 return UnableToLegalize;
8361}
8362
8363static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
8364
8366 MachineFunction &MF = *MI.getMF();
8368 LLVMContext &Ctx = MF.getFunction().getContext();
8369 Register ListPtr = MI.getOperand(1).getReg();
8370 LLT PtrTy = MRI.getType(ListPtr);
8371
8372 // LstPtr is a pointer to the head of the list. Get the address
8373 // of the head of the list.
8374 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8375 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8376 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8377 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8378
8379 const Align A(MI.getOperand(2).getImm());
8380 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8381 if (A > TLI.getMinStackArgumentAlignment()) {
8382 Register AlignAmt =
8383 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8384 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8385 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8386 VAList = AndDst.getReg(0);
8387 }
8388
8389 // Increment the pointer, VAList, to the next vaarg
8390 // The list should be bumped by the size of element in the current head of
8391 // list.
8392 Register Dst = MI.getOperand(0).getReg();
8393 LLT LLTTy = MRI.getType(Dst);
8394 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8395 auto IncAmt =
8396 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8397 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8398
8399 // Store the increment VAList to the legalized pointer
8401 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8402 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8403 // Load the actual argument out of the pointer VAList
8404 Align EltAlignment = DL.getABITypeAlign(Ty);
8405 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8406 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8407 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8408
8409 MI.eraseFromParent();
8410 return Legalized;
8411}
8412
8414 // On Darwin, -Os means optimize for size without hurting performance, so
8415 // only really optimize for size when -Oz (MinSize) is used.
8417 return MF.getFunction().hasMinSize();
8418 return MF.getFunction().hasOptSize();
8419}
8420
8421// Returns a list of types to use for memory op lowering in MemOps. A partial
8422// port of findOptimalMemOpLowering in TargetLowering.
8423static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8424 unsigned Limit, const MemOp &Op,
8425 unsigned DstAS, unsigned SrcAS,
8426 const AttributeList &FuncAttributes,
8427 const TargetLowering &TLI) {
8428 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8429 return false;
8430
8431 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8432
8433 if (Ty == LLT()) {
8434 // Use the largest scalar type whose alignment constraints are satisfied.
8435 // We only need to check DstAlign here as SrcAlign is always greater or
8436 // equal to DstAlign (or zero).
8437 Ty = LLT::scalar(64);
8438 if (Op.isFixedDstAlign())
8439 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8440 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8441 Ty = LLT::scalar(Ty.getSizeInBytes());
8442 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8443 // FIXME: check for the largest legal type we can load/store to.
8444 }
8445
8446 unsigned NumMemOps = 0;
8447 uint64_t Size = Op.size();
8448 while (Size) {
8449 unsigned TySize = Ty.getSizeInBytes();
8450 while (TySize > Size) {
8451 // For now, only use non-vector load / store's for the left-over pieces.
8452 LLT NewTy = Ty;
8453 // FIXME: check for mem op safety and legality of the types. Not all of
8454 // SDAGisms map cleanly to GISel concepts.
8455 if (NewTy.isVector())
8456 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8457 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8458 unsigned NewTySize = NewTy.getSizeInBytes();
8459 assert(NewTySize > 0 && "Could not find appropriate type");
8460
8461 // If the new LLT cannot cover all of the remaining bits, then consider
8462 // issuing a (or a pair of) unaligned and overlapping load / store.
8463 unsigned Fast;
8464 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8465 MVT VT = getMVTForLLT(Ty);
8466 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8468 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8470 Fast)
8471 TySize = Size;
8472 else {
8473 Ty = NewTy;
8474 TySize = NewTySize;
8475 }
8476 }
8477
8478 if (++NumMemOps > Limit)
8479 return false;
8480
8481 MemOps.push_back(Ty);
8482 Size -= TySize;
8483 }
8484
8485 return true;
8486}
8487
8489 if (Ty.isVector())
8491 Ty.getNumElements());
8492 return IntegerType::get(C, Ty.getSizeInBits());
8493}
8494
8495// Get a vectorized representation of the memset value operand, GISel edition.
8497 MachineRegisterInfo &MRI = *MIB.getMRI();
8498 unsigned NumBits = Ty.getScalarSizeInBits();
8499 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8500 if (!Ty.isVector() && ValVRegAndVal) {
8501 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8502 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8503 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8504 }
8505
8506 // Extend the byte value to the larger type, and then multiply by a magic
8507 // value 0x010101... in order to replicate it across every byte.
8508 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8509 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8510 return MIB.buildConstant(Ty, 0).getReg(0);
8511 }
8512
8513 LLT ExtType = Ty.getScalarType();
8514 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8515 if (NumBits > 8) {
8516 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8517 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8518 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8519 }
8520
8521 // For vector types create a G_BUILD_VECTOR.
8522 if (Ty.isVector())
8523 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8524
8525 return Val;
8526}
8527
8529LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8530 uint64_t KnownLen, Align Alignment,
8531 bool IsVolatile) {
8532 auto &MF = *MI.getParent()->getParent();
8533 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8534 auto &DL = MF.getDataLayout();
8535 LLVMContext &C = MF.getFunction().getContext();
8536
8537 assert(KnownLen != 0 && "Have a zero length memset length!");
8538
8539 bool DstAlignCanChange = false;
8540 MachineFrameInfo &MFI = MF.getFrameInfo();
8541 bool OptSize = shouldLowerMemFuncForSize(MF);
8542
8543 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8544 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8545 DstAlignCanChange = true;
8546
8547 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8548 std::vector<LLT> MemOps;
8549
8550 const auto &DstMMO = **MI.memoperands_begin();
8551 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8552
8553 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8554 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8555
8556 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8557 MemOp::Set(KnownLen, DstAlignCanChange,
8558 Alignment,
8559 /*IsZeroMemset=*/IsZeroVal,
8560 /*IsVolatile=*/IsVolatile),
8561 DstPtrInfo.getAddrSpace(), ~0u,
8562 MF.getFunction().getAttributes(), TLI))
8563 return UnableToLegalize;
8564
8565 if (DstAlignCanChange) {
8566 // Get an estimate of the type from the LLT.
8567 Type *IRTy = getTypeForLLT(MemOps[0], C);
8568 Align NewAlign = DL.getABITypeAlign(IRTy);
8569 if (NewAlign > Alignment) {
8570 Alignment = NewAlign;
8571 unsigned FI = FIDef->getOperand(1).getIndex();
8572 // Give the stack frame object a larger alignment if needed.
8573 if (MFI.getObjectAlign(FI) < Alignment)
8574 MFI.setObjectAlignment(FI, Alignment);
8575 }
8576 }
8577
8578 MachineIRBuilder MIB(MI);
8579 // Find the largest store and generate the bit pattern for it.
8580 LLT LargestTy = MemOps[0];
8581 for (unsigned i = 1; i < MemOps.size(); i++)
8582 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8583 LargestTy = MemOps[i];
8584
8585 // The memset stored value is always defined as an s8, so in order to make it
8586 // work with larger store types we need to repeat the bit pattern across the
8587 // wider type.
8588 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8589
8590 if (!MemSetValue)
8591 return UnableToLegalize;
8592
8593 // Generate the stores. For each store type in the list, we generate the
8594 // matching store of that type to the destination address.
8595 LLT PtrTy = MRI.getType(Dst);
8596 unsigned DstOff = 0;
8597 unsigned Size = KnownLen;
8598 for (unsigned I = 0; I < MemOps.size(); I++) {
8599 LLT Ty = MemOps[I];
8600 unsigned TySize = Ty.getSizeInBytes();
8601 if (TySize > Size) {
8602 // Issuing an unaligned load / store pair that overlaps with the previous
8603 // pair. Adjust the offset accordingly.
8604 assert(I == MemOps.size() - 1 && I != 0);
8605 DstOff -= TySize - Size;
8606 }
8607
8608 // If this store is smaller than the largest store see whether we can get
8609 // the smaller value for free with a truncate.
8610 Register Value = MemSetValue;
8611 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8612 MVT VT = getMVTForLLT(Ty);
8613 MVT LargestVT = getMVTForLLT(LargestTy);
8614 if (!LargestTy.isVector() && !Ty.isVector() &&
8615 TLI.isTruncateFree(LargestVT, VT))
8616 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8617 else
8618 Value = getMemsetValue(Val, Ty, MIB);
8619 if (!Value)
8620 return UnableToLegalize;
8621 }
8622
8623 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8624
8625 Register Ptr = Dst;
8626 if (DstOff != 0) {
8627 auto Offset =
8628 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8629 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8630 }
8631
8632 MIB.buildStore(Value, Ptr, *StoreMMO);
8633 DstOff += Ty.getSizeInBytes();
8634 Size -= TySize;
8635 }
8636
8637 MI.eraseFromParent();
8638 return Legalized;
8639}
8640
8642LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8643 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8644
8645 auto [Dst, Src, Len] = MI.getFirst3Regs();
8646
8647 const auto *MMOIt = MI.memoperands_begin();
8648 const MachineMemOperand *MemOp = *MMOIt;
8649 bool IsVolatile = MemOp->isVolatile();
8650
8651 // See if this is a constant length copy
8652 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8653 // FIXME: support dynamically sized G_MEMCPY_INLINE
8654 assert(LenVRegAndVal &&
8655 "inline memcpy with dynamic size is not yet supported");
8656 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8657 if (KnownLen == 0) {
8658 MI.eraseFromParent();
8659 return Legalized;
8660 }
8661
8662 const auto &DstMMO = **MI.memoperands_begin();
8663 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8664 Align DstAlign = DstMMO.getBaseAlign();
8665 Align SrcAlign = SrcMMO.getBaseAlign();
8666
8667 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8668 IsVolatile);
8669}
8670
8672LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8673 uint64_t KnownLen, Align DstAlign,
8674 Align SrcAlign, bool IsVolatile) {
8675 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8676 return lowerMemcpy(MI, Dst, Src, KnownLen,
8677 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8678 IsVolatile);
8679}
8680
8682LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8683 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8684 Align SrcAlign, bool IsVolatile) {
8685 auto &MF = *MI.getParent()->getParent();
8686 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8687 auto &DL = MF.getDataLayout();
8688 LLVMContext &C = MF.getFunction().getContext();
8689
8690 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8691
8692 bool DstAlignCanChange = false;
8693 MachineFrameInfo &MFI = MF.getFrameInfo();
8694 Align Alignment = std::min(DstAlign, SrcAlign);
8695
8696 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8697 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8698 DstAlignCanChange = true;
8699
8700 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8701 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8702 // if the memcpy is in a tail call position.
8703
8704 std::vector<LLT> MemOps;
8705
8706 const auto &DstMMO = **MI.memoperands_begin();
8707 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8708 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8709 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8710
8712 MemOps, Limit,
8713 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8714 IsVolatile),
8715 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8716 MF.getFunction().getAttributes(), TLI))
8717 return UnableToLegalize;
8718
8719 if (DstAlignCanChange) {
8720 // Get an estimate of the type from the LLT.
8721 Type *IRTy = getTypeForLLT(MemOps[0], C);
8722 Align NewAlign = DL.getABITypeAlign(IRTy);
8723
8724 // Don't promote to an alignment that would require dynamic stack
8725 // realignment.
8726 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8727 if (!TRI->hasStackRealignment(MF))
8728 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8729 NewAlign = NewAlign.previous();
8730
8731 if (NewAlign > Alignment) {
8732 Alignment = NewAlign;
8733 unsigned FI = FIDef->getOperand(1).getIndex();
8734 // Give the stack frame object a larger alignment if needed.
8735 if (MFI.getObjectAlign(FI) < Alignment)
8736 MFI.setObjectAlignment(FI, Alignment);
8737 }
8738 }
8739
8740 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8741
8742 MachineIRBuilder MIB(MI);
8743 // Now we need to emit a pair of load and stores for each of the types we've
8744 // collected. I.e. for each type, generate a load from the source pointer of
8745 // that type width, and then generate a corresponding store to the dest buffer
8746 // of that value loaded. This can result in a sequence of loads and stores
8747 // mixed types, depending on what the target specifies as good types to use.
8748 unsigned CurrOffset = 0;
8749 unsigned Size = KnownLen;
8750 for (auto CopyTy : MemOps) {
8751 // Issuing an unaligned load / store pair that overlaps with the previous
8752 // pair. Adjust the offset accordingly.
8753 if (CopyTy.getSizeInBytes() > Size)
8754 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8755
8756 // Construct MMOs for the accesses.
8757 auto *LoadMMO =
8758 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8759 auto *StoreMMO =
8760 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8761
8762 // Create the load.
8763 Register LoadPtr = Src;
8765 if (CurrOffset != 0) {
8766 LLT SrcTy = MRI.getType(Src);
8767 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8768 .getReg(0);
8769 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8770 }
8771 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8772
8773 // Create the store.
8774 Register StorePtr = Dst;
8775 if (CurrOffset != 0) {
8776 LLT DstTy = MRI.getType(Dst);
8777 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8778 }
8779 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8780 CurrOffset += CopyTy.getSizeInBytes();
8781 Size -= CopyTy.getSizeInBytes();
8782 }
8783
8784 MI.eraseFromParent();
8785 return Legalized;
8786}
8787
8789LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8790 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8791 bool IsVolatile) {
8792 auto &MF = *MI.getParent()->getParent();
8793 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8794 auto &DL = MF.getDataLayout();
8795 LLVMContext &C = MF.getFunction().getContext();
8796
8797 assert(KnownLen != 0 && "Have a zero length memmove length!");
8798
8799 bool DstAlignCanChange = false;
8800 MachineFrameInfo &MFI = MF.getFrameInfo();
8801 bool OptSize = shouldLowerMemFuncForSize(MF);
8802 Align Alignment = std::min(DstAlign, SrcAlign);
8803
8804 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8805 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8806 DstAlignCanChange = true;
8807
8808 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8809 std::vector<LLT> MemOps;
8810
8811 const auto &DstMMO = **MI.memoperands_begin();
8812 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8813 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8814 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8815
8816 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8817 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8818 // same thing here.
8820 MemOps, Limit,
8821 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8822 /*IsVolatile*/ true),
8823 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8824 MF.getFunction().getAttributes(), TLI))
8825 return UnableToLegalize;
8826
8827 if (DstAlignCanChange) {
8828 // Get an estimate of the type from the LLT.
8829 Type *IRTy = getTypeForLLT(MemOps[0], C);
8830 Align NewAlign = DL.getABITypeAlign(IRTy);
8831
8832 // Don't promote to an alignment that would require dynamic stack
8833 // realignment.
8834 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8835 if (!TRI->hasStackRealignment(MF))
8836 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8837 NewAlign = NewAlign.previous();
8838
8839 if (NewAlign > Alignment) {
8840 Alignment = NewAlign;
8841 unsigned FI = FIDef->getOperand(1).getIndex();
8842 // Give the stack frame object a larger alignment if needed.
8843 if (MFI.getObjectAlign(FI) < Alignment)
8844 MFI.setObjectAlignment(FI, Alignment);
8845 }
8846 }
8847
8848 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8849
8850 MachineIRBuilder MIB(MI);
8851 // Memmove requires that we perform the loads first before issuing the stores.
8852 // Apart from that, this loop is pretty much doing the same thing as the
8853 // memcpy codegen function.
8854 unsigned CurrOffset = 0;
8856 for (auto CopyTy : MemOps) {
8857 // Construct MMO for the load.
8858 auto *LoadMMO =
8859 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8860
8861 // Create the load.
8862 Register LoadPtr = Src;
8863 if (CurrOffset != 0) {
8864 LLT SrcTy = MRI.getType(Src);
8865 auto Offset =
8866 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
8867 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8868 }
8869 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8870 CurrOffset += CopyTy.getSizeInBytes();
8871 }
8872
8873 CurrOffset = 0;
8874 for (unsigned I = 0; I < MemOps.size(); ++I) {
8875 LLT CopyTy = MemOps[I];
8876 // Now store the values loaded.
8877 auto *StoreMMO =
8878 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8879
8880 Register StorePtr = Dst;
8881 if (CurrOffset != 0) {
8882 LLT DstTy = MRI.getType(Dst);
8883 auto Offset =
8884 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
8885 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8886 }
8887 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8888 CurrOffset += CopyTy.getSizeInBytes();
8889 }
8890 MI.eraseFromParent();
8891 return Legalized;
8892}
8893
8896 const unsigned Opc = MI.getOpcode();
8897 // This combine is fairly complex so it's not written with a separate
8898 // matcher function.
8899 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8900 Opc == TargetOpcode::G_MEMSET) &&
8901 "Expected memcpy like instruction");
8902
8903 auto MMOIt = MI.memoperands_begin();
8904 const MachineMemOperand *MemOp = *MMOIt;
8905
8906 Align DstAlign = MemOp->getBaseAlign();
8907 Align SrcAlign;
8908 auto [Dst, Src, Len] = MI.getFirst3Regs();
8909
8910 if (Opc != TargetOpcode::G_MEMSET) {
8911 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8912 MemOp = *(++MMOIt);
8913 SrcAlign = MemOp->getBaseAlign();
8914 }
8915
8916 // See if this is a constant length copy
8917 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8918 if (!LenVRegAndVal)
8919 return UnableToLegalize;
8920 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8921
8922 if (KnownLen == 0) {
8923 MI.eraseFromParent();
8924 return Legalized;
8925 }
8926
8927 bool IsVolatile = MemOp->isVolatile();
8928 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8929 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8930 IsVolatile);
8931
8932 // Don't try to optimize volatile.
8933 if (IsVolatile)
8934 return UnableToLegalize;
8935
8936 if (MaxLen && KnownLen > MaxLen)
8937 return UnableToLegalize;
8938
8939 if (Opc == TargetOpcode::G_MEMCPY) {
8940 auto &MF = *MI.getParent()->getParent();
8941 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8942 bool OptSize = shouldLowerMemFuncForSize(MF);
8943 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8944 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8945 IsVolatile);
8946 }
8947 if (Opc == TargetOpcode::G_MEMMOVE)
8948 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8949 if (Opc == TargetOpcode::G_MEMSET)
8950 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8951 return UnableToLegalize;
8952}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
void negate()
Negate this APInt in place.
Definition: APInt.h:1421
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1075
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:803
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
bool isSigned() const
Definition: InstrTypes.h:1265
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:302
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:681
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFreeze(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_FREEZE Src.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:929
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:789
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:553
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:228
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1540
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1144
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:479
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:349
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:219
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:334
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1232
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:583
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)