LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy =
73 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
74 OrigTy.getElementType());
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
105 GISelChangeObserver &Observer,
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
110
112 GISelChangeObserver &Observer,
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
121
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
186 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
187 AllRegs.push_back(Reg);
188 return mergeMixedSubvectors(DstReg, AllRegs);
189 }
190
191 SmallVector<Register> GCDRegs;
192 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
193 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
197}
198
199void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
200 Register Reg) {
201 LLT Ty = MRI.getType(Reg);
203 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
204 MIRBuilder, MRI);
205 Elts.append(RegElts);
206}
207
208/// Merge \p PartRegs with different types into \p DstReg.
209void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
210 ArrayRef<Register> PartRegs) {
212 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
214
215 Register Leftover = PartRegs[PartRegs.size() - 1];
216 if (!MRI.getType(Leftover).isVector())
217 AllElts.push_back(Leftover);
218 else
219 appendVectorElts(AllElts, Leftover);
220
221 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
222}
223
224/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
226 const MachineInstr &MI) {
227 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228
229 const int StartIdx = Regs.size();
230 const int NumResults = MI.getNumOperands() - 1;
231 Regs.resize(Regs.size() + NumResults);
232 for (int I = 0; I != NumResults; ++I)
233 Regs[StartIdx + I] = MI.getOperand(I).getReg();
234}
235
236void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
237 LLT GCDTy, Register SrcReg) {
238 LLT SrcTy = MRI.getType(SrcReg);
239 if (SrcTy == GCDTy) {
240 // If the source already evenly divides the result type, we don't need to do
241 // anything.
242 Parts.push_back(SrcReg);
243 } else {
244 // Need to split into common type sized pieces.
245 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
246 getUnmergeResults(Parts, *Unmerge);
247 }
248}
249
250LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
251 LLT NarrowTy, Register SrcReg) {
252 LLT SrcTy = MRI.getType(SrcReg);
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
254 extractGCDType(Parts, GCDTy, SrcReg);
255 return GCDTy;
256}
257
258LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
260 unsigned PadStrategy) {
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
262
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
265 int NumOrigSrc = VRegs.size();
266
267 Register PadReg;
268
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
273 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
275 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
276 else {
277 assert(PadStrategy == TargetOpcode::G_SEXT);
278
279 // Shift the sign bit of the low register through the high register.
280 auto ShiftAmt =
282 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
283 }
284 }
285
286 // Registers for the final merge to be produced.
287 SmallVector<Register, 4> Remerge(NumParts);
288
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector<Register, 4> SubMerge(NumSubParts);
292
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
295 Register AllPadReg;
296
297 // Build merges to the LCM type to cover the original result type.
298 for (int I = 0; I != NumParts; ++I) {
299 bool AllMergePartsArePadding = true;
300
301 // Build the requested merges to the requested type.
302 for (int J = 0; J != NumSubParts; ++J) {
303 int Idx = I * NumSubParts + J;
304 if (Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
306 continue;
307 }
308
309 SubMerge[J] = VRegs[Idx];
310
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding = false;
313 }
314
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
323
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
326 }
327
328 if (AllPadReg) {
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge[I] = AllPadReg;
332 continue;
333 }
334
335 if (NumSubParts == 1)
336 Remerge[I] = SubMerge[0];
337 else
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
339
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[I];
343 }
344
345 VRegs = std::move(Remerge);
346 return LCMTy;
347}
348
349void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
350 ArrayRef<Register> RemergeRegs) {
351 LLT DstTy = MRI.getType(DstReg);
352
353 // Create the merge to the widened source, and extract the relevant bits into
354 // the result.
355
356 if (DstTy == LCMTy) {
357 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
358 return;
359 }
360
361 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
362 if (DstTy.isScalar() && LCMTy.isScalar()) {
363 MIRBuilder.buildTrunc(DstReg, Remerge);
364 return;
365 }
366
367 if (LCMTy.isVector()) {
368 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
369 SmallVector<Register, 8> UnmergeDefs(NumDefs);
370 UnmergeDefs[0] = DstReg;
371 for (unsigned I = 1; I != NumDefs; ++I)
372 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
373
374 MIRBuilder.buildUnmerge(UnmergeDefs,
375 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
376 return;
377 }
378
379 llvm_unreachable("unhandled case");
380}
381
382static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
383#define RTLIBCASE_INT(LibcallPrefix) \
384 do { \
385 switch (Size) { \
386 case 32: \
387 return RTLIB::LibcallPrefix##32; \
388 case 64: \
389 return RTLIB::LibcallPrefix##64; \
390 case 128: \
391 return RTLIB::LibcallPrefix##128; \
392 default: \
393 llvm_unreachable("unexpected size"); \
394 } \
395 } while (0)
396
397#define RTLIBCASE(LibcallPrefix) \
398 do { \
399 switch (Size) { \
400 case 32: \
401 return RTLIB::LibcallPrefix##32; \
402 case 64: \
403 return RTLIB::LibcallPrefix##64; \
404 case 80: \
405 return RTLIB::LibcallPrefix##80; \
406 case 128: \
407 return RTLIB::LibcallPrefix##128; \
408 default: \
409 llvm_unreachable("unexpected size"); \
410 } \
411 } while (0)
412
413 switch (Opcode) {
414 case TargetOpcode::G_MUL:
415 RTLIBCASE_INT(MUL_I);
416 case TargetOpcode::G_SDIV:
417 RTLIBCASE_INT(SDIV_I);
418 case TargetOpcode::G_UDIV:
419 RTLIBCASE_INT(UDIV_I);
420 case TargetOpcode::G_SREM:
421 RTLIBCASE_INT(SREM_I);
422 case TargetOpcode::G_UREM:
423 RTLIBCASE_INT(UREM_I);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 RTLIBCASE_INT(CTLZ_I);
426 case TargetOpcode::G_FADD:
427 RTLIBCASE(ADD_F);
428 case TargetOpcode::G_FSUB:
429 RTLIBCASE(SUB_F);
430 case TargetOpcode::G_FMUL:
431 RTLIBCASE(MUL_F);
432 case TargetOpcode::G_FDIV:
433 RTLIBCASE(DIV_F);
434 case TargetOpcode::G_FEXP:
435 RTLIBCASE(EXP_F);
436 case TargetOpcode::G_FEXP2:
437 RTLIBCASE(EXP2_F);
438 case TargetOpcode::G_FEXP10:
439 RTLIBCASE(EXP10_F);
440 case TargetOpcode::G_FREM:
441 RTLIBCASE(REM_F);
442 case TargetOpcode::G_FPOW:
443 RTLIBCASE(POW_F);
444 case TargetOpcode::G_FPOWI:
445 RTLIBCASE(POWI_F);
446 case TargetOpcode::G_FMA:
447 RTLIBCASE(FMA_F);
448 case TargetOpcode::G_FSIN:
449 RTLIBCASE(SIN_F);
450 case TargetOpcode::G_FCOS:
451 RTLIBCASE(COS_F);
452 case TargetOpcode::G_FTAN:
453 RTLIBCASE(TAN_F);
454 case TargetOpcode::G_FLOG10:
455 RTLIBCASE(LOG10_F);
456 case TargetOpcode::G_FLOG:
457 RTLIBCASE(LOG_F);
458 case TargetOpcode::G_FLOG2:
459 RTLIBCASE(LOG2_F);
460 case TargetOpcode::G_FLDEXP:
461 RTLIBCASE(LDEXP_F);
462 case TargetOpcode::G_FCEIL:
463 RTLIBCASE(CEIL_F);
464 case TargetOpcode::G_FFLOOR:
465 RTLIBCASE(FLOOR_F);
466 case TargetOpcode::G_FMINNUM:
467 RTLIBCASE(FMIN_F);
468 case TargetOpcode::G_FMAXNUM:
469 RTLIBCASE(FMAX_F);
470 case TargetOpcode::G_FSQRT:
471 RTLIBCASE(SQRT_F);
472 case TargetOpcode::G_FRINT:
473 RTLIBCASE(RINT_F);
474 case TargetOpcode::G_FNEARBYINT:
475 RTLIBCASE(NEARBYINT_F);
476 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
477 RTLIBCASE(ROUNDEVEN_F);
478 case TargetOpcode::G_INTRINSIC_LRINT:
479 RTLIBCASE(LRINT_F);
480 case TargetOpcode::G_INTRINSIC_LLRINT:
481 RTLIBCASE(LLRINT_F);
482 }
483 llvm_unreachable("Unknown libcall function");
484}
485
486/// True if an instruction is in tail position in its caller. Intended for
487/// legalizing libcalls as tail calls when possible.
490 const TargetInstrInfo &TII,
492 MachineBasicBlock &MBB = *MI.getParent();
493 const Function &F = MBB.getParent()->getFunction();
494
495 // Conservatively require the attributes of the call to match those of
496 // the return. Ignore NoAlias and NonNull because they don't affect the
497 // call sequence.
498 AttributeList CallerAttrs = F.getAttributes();
499 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
500 .removeAttribute(Attribute::NoAlias)
501 .removeAttribute(Attribute::NonNull)
502 .hasAttributes())
503 return false;
504
505 // It's not safe to eliminate the sign / zero extension of the return value.
506 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
507 CallerAttrs.hasRetAttr(Attribute::SExt))
508 return false;
509
510 // Only tail call if the following instruction is a standard return or if we
511 // have a `thisreturn` callee, and a sequence like:
512 //
513 // G_MEMCPY %0, %1, %2
514 // $x0 = COPY %0
515 // RET_ReallyLR implicit $x0
516 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
517 if (Next != MBB.instr_end() && Next->isCopy()) {
518 if (MI.getOpcode() == TargetOpcode::G_BZERO)
519 return false;
520
521 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
522 // mempy/etc routines return the same parameter. For other it will be the
523 // returned value.
524 Register VReg = MI.getOperand(0).getReg();
525 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
526 return false;
527
528 Register PReg = Next->getOperand(0).getReg();
529 if (!PReg.isPhysical())
530 return false;
531
532 auto Ret = next_nodbg(Next, MBB.instr_end());
533 if (Ret == MBB.instr_end() || !Ret->isReturn())
534 return false;
535
536 if (Ret->getNumImplicitOperands() != 1)
537 return false;
538
539 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
540 return false;
541
542 // Skip over the COPY that we just validated.
543 Next = Ret;
544 }
545
546 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
547 return false;
548
549 return true;
550}
551
554 const CallLowering::ArgInfo &Result,
556 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
557 MachineInstr *MI) {
558 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
559
561 Info.CallConv = CC;
563 Info.OrigRet = Result;
564 if (MI)
565 Info.IsTailCall =
566 (Result.Ty->isVoidTy() ||
567 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
568 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
569 *MIRBuilder.getMRI());
570
571 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
572 if (!CLI.lowerCall(MIRBuilder, Info))
574
575 if (MI && Info.LoweredTailCall) {
576 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
577
578 // Check debug locations before removing the return.
579 LocObserver.checkpoint(true);
580
581 // We must have a return following the call (or debug insts) to get past
582 // isLibCallInTailPosition.
583 do {
584 MachineInstr *Next = MI->getNextNode();
585 assert(Next &&
586 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
587 "Expected instr following MI to be return or debug inst?");
588 // We lowered a tail call, so the call is now the return from the block.
589 // Delete the old return.
590 Next->eraseFromParent();
591 } while (MI->getNextNode());
592
593 // We expect to lose the debug location from the return.
594 LocObserver.checkpoint(false);
595 }
597}
598
601 const CallLowering::ArgInfo &Result,
603 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
604 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
605 const char *Name = TLI.getLibcallName(Libcall);
606 if (!Name)
608 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
609 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
610}
611
612// Useful for libcalls where all operands have the same type.
615 Type *OpType, LostDebugLocObserver &LocObserver) {
616 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
617
618 // FIXME: What does the original arg index mean here?
620 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
621 Args.push_back({MO.getReg(), OpType, 0});
622 return createLibcall(MIRBuilder, Libcall,
623 {MI.getOperand(0).getReg(), OpType, 0}, Args,
624 LocObserver, &MI);
625}
626
629 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
630 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
631
633 // Add all the args, except for the last which is an imm denoting 'tail'.
634 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
635 Register Reg = MI.getOperand(i).getReg();
636
637 // Need derive an IR type for call lowering.
638 LLT OpLLT = MRI.getType(Reg);
639 Type *OpTy = nullptr;
640 if (OpLLT.isPointer())
641 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
642 else
643 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
644 Args.push_back({Reg, OpTy, 0});
645 }
646
647 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
648 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
649 RTLIB::Libcall RTLibcall;
650 unsigned Opc = MI.getOpcode();
651 switch (Opc) {
652 case TargetOpcode::G_BZERO:
653 RTLibcall = RTLIB::BZERO;
654 break;
655 case TargetOpcode::G_MEMCPY:
656 RTLibcall = RTLIB::MEMCPY;
657 Args[0].Flags[0].setReturned();
658 break;
659 case TargetOpcode::G_MEMMOVE:
660 RTLibcall = RTLIB::MEMMOVE;
661 Args[0].Flags[0].setReturned();
662 break;
663 case TargetOpcode::G_MEMSET:
664 RTLibcall = RTLIB::MEMSET;
665 Args[0].Flags[0].setReturned();
666 break;
667 default:
668 llvm_unreachable("unsupported opcode");
669 }
670 const char *Name = TLI.getLibcallName(RTLibcall);
671
672 // Unsupported libcall on the target.
673 if (!Name) {
674 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
675 << MIRBuilder.getTII().getName(Opc) << "\n");
677 }
678
680 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
682 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
683 Info.IsTailCall =
684 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
685 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
686
687 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
688 if (!CLI.lowerCall(MIRBuilder, Info))
690
691 if (Info.LoweredTailCall) {
692 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
693
694 // Check debug locations before removing the return.
695 LocObserver.checkpoint(true);
696
697 // We must have a return following the call (or debug insts) to get past
698 // isLibCallInTailPosition.
699 do {
700 MachineInstr *Next = MI.getNextNode();
701 assert(Next &&
702 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
703 "Expected instr following MI to be return or debug inst?");
704 // We lowered a tail call, so the call is now the return from the block.
705 // Delete the old return.
706 Next->eraseFromParent();
707 } while (MI.getNextNode());
708
709 // We expect to lose the debug location from the return.
710 LocObserver.checkpoint(false);
711 }
712
714}
715
717 unsigned Opc = MI.getOpcode();
718 auto &AtomicMI = cast<GMemOperation>(MI);
719 auto &MMO = AtomicMI.getMMO();
720 auto Ordering = MMO.getMergedOrdering();
721 LLT MemType = MMO.getMemoryType();
722 uint64_t MemSize = MemType.getSizeInBytes();
723 if (MemType.isVector())
724 return RTLIB::UNKNOWN_LIBCALL;
725
726#define LCALLS(A, B) \
727 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
728#define LCALL5(A) \
729 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
730 switch (Opc) {
731 case TargetOpcode::G_ATOMIC_CMPXCHG:
732 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
733 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
734 return getOutlineAtomicHelper(LC, Ordering, MemSize);
735 }
736 case TargetOpcode::G_ATOMICRMW_XCHG: {
737 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
738 return getOutlineAtomicHelper(LC, Ordering, MemSize);
739 }
740 case TargetOpcode::G_ATOMICRMW_ADD:
741 case TargetOpcode::G_ATOMICRMW_SUB: {
742 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
743 return getOutlineAtomicHelper(LC, Ordering, MemSize);
744 }
745 case TargetOpcode::G_ATOMICRMW_AND: {
746 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
747 return getOutlineAtomicHelper(LC, Ordering, MemSize);
748 }
749 case TargetOpcode::G_ATOMICRMW_OR: {
750 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
751 return getOutlineAtomicHelper(LC, Ordering, MemSize);
752 }
753 case TargetOpcode::G_ATOMICRMW_XOR: {
754 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
755 return getOutlineAtomicHelper(LC, Ordering, MemSize);
756 }
757 default:
758 return RTLIB::UNKNOWN_LIBCALL;
759 }
760#undef LCALLS
761#undef LCALL5
762}
763
766 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
767
768 Type *RetTy;
769 SmallVector<Register> RetRegs;
771 unsigned Opc = MI.getOpcode();
772 switch (Opc) {
773 case TargetOpcode::G_ATOMIC_CMPXCHG:
774 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
776 LLT SuccessLLT;
777 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
778 MI.getFirst4RegLLTs();
779 RetRegs.push_back(Ret);
780 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
781 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
782 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
783 NewLLT) = MI.getFirst5RegLLTs();
784 RetRegs.push_back(Success);
786 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
787 }
788 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
789 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
790 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
791 break;
792 }
793 case TargetOpcode::G_ATOMICRMW_XCHG:
794 case TargetOpcode::G_ATOMICRMW_ADD:
795 case TargetOpcode::G_ATOMICRMW_SUB:
796 case TargetOpcode::G_ATOMICRMW_AND:
797 case TargetOpcode::G_ATOMICRMW_OR:
798 case TargetOpcode::G_ATOMICRMW_XOR: {
799 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
800 RetRegs.push_back(Ret);
801 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
802 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
803 Val =
804 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
805 .getReg(0);
806 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
807 Val =
808 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
809 .getReg(0);
810 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
811 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
812 break;
813 }
814 default:
815 llvm_unreachable("unsupported opcode");
816 }
817
818 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
819 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
821 const char *Name = TLI.getLibcallName(RTLibcall);
822
823 // Unsupported libcall on the target.
824 if (!Name) {
825 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
826 << MIRBuilder.getTII().getName(Opc) << "\n");
828 }
829
831 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
833 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
834
835 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
836 if (!CLI.lowerCall(MIRBuilder, Info))
838
840}
841
842static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
843 Type *FromType) {
844 auto ToMVT = MVT::getVT(ToType);
845 auto FromMVT = MVT::getVT(FromType);
846
847 switch (Opcode) {
848 case TargetOpcode::G_FPEXT:
849 return RTLIB::getFPEXT(FromMVT, ToMVT);
850 case TargetOpcode::G_FPTRUNC:
851 return RTLIB::getFPROUND(FromMVT, ToMVT);
852 case TargetOpcode::G_FPTOSI:
853 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
854 case TargetOpcode::G_FPTOUI:
855 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
856 case TargetOpcode::G_SITOFP:
857 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
858 case TargetOpcode::G_UITOFP:
859 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
860 }
861 llvm_unreachable("Unsupported libcall function");
862}
863
866 Type *FromType, LostDebugLocObserver &LocObserver) {
867 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
868 return createLibcall(
869 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
870 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
871}
872
873static RTLIB::Libcall
875 RTLIB::Libcall RTLibcall;
876 switch (MI.getOpcode()) {
877 case TargetOpcode::G_GET_FPENV:
878 RTLibcall = RTLIB::FEGETENV;
879 break;
880 case TargetOpcode::G_SET_FPENV:
881 case TargetOpcode::G_RESET_FPENV:
882 RTLibcall = RTLIB::FESETENV;
883 break;
884 case TargetOpcode::G_GET_FPMODE:
885 RTLibcall = RTLIB::FEGETMODE;
886 break;
887 case TargetOpcode::G_SET_FPMODE:
888 case TargetOpcode::G_RESET_FPMODE:
889 RTLibcall = RTLIB::FESETMODE;
890 break;
891 default:
892 llvm_unreachable("Unexpected opcode");
893 }
894 return RTLibcall;
895}
896
897// Some library functions that read FP state (fegetmode, fegetenv) write the
898// state into a region in memory. IR intrinsics that do the same operations
899// (get_fpmode, get_fpenv) return the state as integer value. To implement these
900// intrinsics via the library functions, we need to use temporary variable,
901// for example:
902//
903// %0:_(s32) = G_GET_FPMODE
904//
905// is transformed to:
906//
907// %1:_(p0) = G_FRAME_INDEX %stack.0
908// BL &fegetmode
909// %0:_(s32) = G_LOAD % 1
910//
912LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
914 LostDebugLocObserver &LocObserver) {
916 auto &MF = MIRBuilder.getMF();
917 auto &MRI = *MIRBuilder.getMRI();
918 auto &Ctx = MF.getFunction().getContext();
919
920 // Create temporary, where library function will put the read state.
921 Register Dst = MI.getOperand(0).getReg();
922 LLT StateTy = MRI.getType(Dst);
923 TypeSize StateSize = StateTy.getSizeInBytes();
925 MachinePointerInfo TempPtrInfo;
926 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
927
928 // Create a call to library function, with the temporary as an argument.
929 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
930 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
932 auto Res =
933 createLibcall(MIRBuilder, RTLibcall,
935 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
936 LocObserver, nullptr);
938 return Res;
939
940 // Create a load from the temporary.
941 MachineMemOperand *MMO = MF.getMachineMemOperand(
942 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
943 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
944
946}
947
948// Similar to `createGetStateLibcall` the function calls a library function
949// using transient space in stack. In this case the library function reads
950// content of memory region.
952LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
954 LostDebugLocObserver &LocObserver) {
956 auto &MF = MIRBuilder.getMF();
957 auto &MRI = *MIRBuilder.getMRI();
958 auto &Ctx = MF.getFunction().getContext();
959
960 // Create temporary, where library function will get the new state.
961 Register Src = MI.getOperand(0).getReg();
962 LLT StateTy = MRI.getType(Src);
963 TypeSize StateSize = StateTy.getSizeInBytes();
965 MachinePointerInfo TempPtrInfo;
966 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
967
968 // Put the new state into the temporary.
969 MachineMemOperand *MMO = MF.getMachineMemOperand(
970 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
971 MIRBuilder.buildStore(Src, Temp, *MMO);
972
973 // Create a call to library function, with the temporary as an argument.
974 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
975 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
977 return createLibcall(MIRBuilder, RTLibcall,
979 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
980 LocObserver, nullptr);
981}
982
983// The function is used to legalize operations that set default environment
984// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
985// On most targets supported in glibc FE_DFL_MODE is defined as
986// `((const femode_t *) -1)`. Such assumption is used here. If for some target
987// it is not true, the target must provide custom lowering.
989LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
991 LostDebugLocObserver &LocObserver) {
993 auto &MF = MIRBuilder.getMF();
994 auto &Ctx = MF.getFunction().getContext();
995
996 // Create an argument for the library function.
997 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
998 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
999 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1000 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1001 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1002 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1003 MIRBuilder.buildIntToPtr(Dest, DefValue);
1004
1006 return createLibcall(MIRBuilder, RTLibcall,
1008 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1009 LocObserver, &MI);
1010}
1011
1014 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1015
1016 switch (MI.getOpcode()) {
1017 default:
1018 return UnableToLegalize;
1019 case TargetOpcode::G_MUL:
1020 case TargetOpcode::G_SDIV:
1021 case TargetOpcode::G_UDIV:
1022 case TargetOpcode::G_SREM:
1023 case TargetOpcode::G_UREM:
1024 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1025 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1026 unsigned Size = LLTy.getSizeInBits();
1027 Type *HLTy = IntegerType::get(Ctx, Size);
1028 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1029 if (Status != Legalized)
1030 return Status;
1031 break;
1032 }
1033 case TargetOpcode::G_FADD:
1034 case TargetOpcode::G_FSUB:
1035 case TargetOpcode::G_FMUL:
1036 case TargetOpcode::G_FDIV:
1037 case TargetOpcode::G_FMA:
1038 case TargetOpcode::G_FPOW:
1039 case TargetOpcode::G_FREM:
1040 case TargetOpcode::G_FCOS:
1041 case TargetOpcode::G_FSIN:
1042 case TargetOpcode::G_FTAN:
1043 case TargetOpcode::G_FLOG10:
1044 case TargetOpcode::G_FLOG:
1045 case TargetOpcode::G_FLOG2:
1046 case TargetOpcode::G_FLDEXP:
1047 case TargetOpcode::G_FEXP:
1048 case TargetOpcode::G_FEXP2:
1049 case TargetOpcode::G_FEXP10:
1050 case TargetOpcode::G_FCEIL:
1051 case TargetOpcode::G_FFLOOR:
1052 case TargetOpcode::G_FMINNUM:
1053 case TargetOpcode::G_FMAXNUM:
1054 case TargetOpcode::G_FSQRT:
1055 case TargetOpcode::G_FRINT:
1056 case TargetOpcode::G_FNEARBYINT:
1057 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1058 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1059 unsigned Size = LLTy.getSizeInBits();
1060 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1061 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1062 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1063 return UnableToLegalize;
1064 }
1065 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1066 if (Status != Legalized)
1067 return Status;
1068 break;
1069 }
1070 case TargetOpcode::G_INTRINSIC_LRINT:
1071 case TargetOpcode::G_INTRINSIC_LLRINT: {
1072 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1073 unsigned Size = LLTy.getSizeInBits();
1074 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1075 Type *ITy = IntegerType::get(
1076 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1077 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1078 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1079 return UnableToLegalize;
1080 }
1081 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1083 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1084 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1085 if (Status != Legalized)
1086 return Status;
1087 MI.eraseFromParent();
1088 return Legalized;
1089 }
1090 case TargetOpcode::G_FPOWI: {
1091 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1092 unsigned Size = LLTy.getSizeInBits();
1093 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1094 Type *ITy = IntegerType::get(
1095 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1096 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1097 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1098 return UnableToLegalize;
1099 }
1100 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1101 std::initializer_list<CallLowering::ArgInfo> Args = {
1102 {MI.getOperand(1).getReg(), HLTy, 0},
1103 {MI.getOperand(2).getReg(), ITy, 1}};
1105 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1106 Args, LocObserver, &MI);
1107 if (Status != Legalized)
1108 return Status;
1109 break;
1110 }
1111 case TargetOpcode::G_FPEXT:
1112 case TargetOpcode::G_FPTRUNC: {
1113 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1114 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1115 if (!FromTy || !ToTy)
1116 return UnableToLegalize;
1118 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1119 if (Status != Legalized)
1120 return Status;
1121 break;
1122 }
1123 case TargetOpcode::G_FPTOSI:
1124 case TargetOpcode::G_FPTOUI: {
1125 // FIXME: Support other types
1126 Type *FromTy =
1127 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1128 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1129 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1130 return UnableToLegalize;
1132 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
1133 if (Status != Legalized)
1134 return Status;
1135 break;
1136 }
1137 case TargetOpcode::G_SITOFP:
1138 case TargetOpcode::G_UITOFP: {
1139 // FIXME: Support other types
1140 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1141 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1142 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1143 return UnableToLegalize;
1145 MI, MIRBuilder,
1146 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1147 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1148 LocObserver);
1149 if (Status != Legalized)
1150 return Status;
1151 break;
1152 }
1153 case TargetOpcode::G_ATOMICRMW_XCHG:
1154 case TargetOpcode::G_ATOMICRMW_ADD:
1155 case TargetOpcode::G_ATOMICRMW_SUB:
1156 case TargetOpcode::G_ATOMICRMW_AND:
1157 case TargetOpcode::G_ATOMICRMW_OR:
1158 case TargetOpcode::G_ATOMICRMW_XOR:
1159 case TargetOpcode::G_ATOMIC_CMPXCHG:
1160 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1162 if (Status != Legalized)
1163 return Status;
1164 break;
1165 }
1166 case TargetOpcode::G_BZERO:
1167 case TargetOpcode::G_MEMCPY:
1168 case TargetOpcode::G_MEMMOVE:
1169 case TargetOpcode::G_MEMSET: {
1170 LegalizeResult Result =
1171 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1172 if (Result != Legalized)
1173 return Result;
1174 MI.eraseFromParent();
1175 return Result;
1176 }
1177 case TargetOpcode::G_GET_FPENV:
1178 case TargetOpcode::G_GET_FPMODE: {
1179 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1180 if (Result != Legalized)
1181 return Result;
1182 break;
1183 }
1184 case TargetOpcode::G_SET_FPENV:
1185 case TargetOpcode::G_SET_FPMODE: {
1186 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1187 if (Result != Legalized)
1188 return Result;
1189 break;
1190 }
1191 case TargetOpcode::G_RESET_FPENV:
1192 case TargetOpcode::G_RESET_FPMODE: {
1193 LegalizeResult Result =
1194 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1195 if (Result != Legalized)
1196 return Result;
1197 break;
1198 }
1199 }
1200
1201 MI.eraseFromParent();
1202 return Legalized;
1203}
1204
1206 unsigned TypeIdx,
1207 LLT NarrowTy) {
1208 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1209 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1210
1211 switch (MI.getOpcode()) {
1212 default:
1213 return UnableToLegalize;
1214 case TargetOpcode::G_IMPLICIT_DEF: {
1215 Register DstReg = MI.getOperand(0).getReg();
1216 LLT DstTy = MRI.getType(DstReg);
1217
1218 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1219 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1220 // FIXME: Although this would also be legal for the general case, it causes
1221 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1222 // combines not being hit). This seems to be a problem related to the
1223 // artifact combiner.
1224 if (SizeOp0 % NarrowSize != 0) {
1225 LLT ImplicitTy = NarrowTy;
1226 if (DstTy.isVector())
1227 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1228
1229 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1230 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1231
1232 MI.eraseFromParent();
1233 return Legalized;
1234 }
1235
1236 int NumParts = SizeOp0 / NarrowSize;
1237
1239 for (int i = 0; i < NumParts; ++i)
1240 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1241
1242 if (DstTy.isVector())
1243 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1244 else
1245 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1246 MI.eraseFromParent();
1247 return Legalized;
1248 }
1249 case TargetOpcode::G_CONSTANT: {
1250 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1251 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1252 unsigned TotalSize = Ty.getSizeInBits();
1253 unsigned NarrowSize = NarrowTy.getSizeInBits();
1254 int NumParts = TotalSize / NarrowSize;
1255
1256 SmallVector<Register, 4> PartRegs;
1257 for (int I = 0; I != NumParts; ++I) {
1258 unsigned Offset = I * NarrowSize;
1259 auto K = MIRBuilder.buildConstant(NarrowTy,
1260 Val.lshr(Offset).trunc(NarrowSize));
1261 PartRegs.push_back(K.getReg(0));
1262 }
1263
1264 LLT LeftoverTy;
1265 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1266 SmallVector<Register, 1> LeftoverRegs;
1267 if (LeftoverBits != 0) {
1268 LeftoverTy = LLT::scalar(LeftoverBits);
1269 auto K = MIRBuilder.buildConstant(
1270 LeftoverTy,
1271 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1272 LeftoverRegs.push_back(K.getReg(0));
1273 }
1274
1275 insertParts(MI.getOperand(0).getReg(),
1276 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1277
1278 MI.eraseFromParent();
1279 return Legalized;
1280 }
1281 case TargetOpcode::G_SEXT:
1282 case TargetOpcode::G_ZEXT:
1283 case TargetOpcode::G_ANYEXT:
1284 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1285 case TargetOpcode::G_TRUNC: {
1286 if (TypeIdx != 1)
1287 return UnableToLegalize;
1288
1289 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1290 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1291 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1292 return UnableToLegalize;
1293 }
1294
1295 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1296 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1297 MI.eraseFromParent();
1298 return Legalized;
1299 }
1300 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1301 case TargetOpcode::G_FREEZE: {
1302 if (TypeIdx != 0)
1303 return UnableToLegalize;
1304
1305 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1306 // Should widen scalar first
1307 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1308 return UnableToLegalize;
1309
1310 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1312 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1313 Parts.push_back(
1314 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1315 .getReg(0));
1316 }
1317
1318 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1319 MI.eraseFromParent();
1320 return Legalized;
1321 }
1322 case TargetOpcode::G_ADD:
1323 case TargetOpcode::G_SUB:
1324 case TargetOpcode::G_SADDO:
1325 case TargetOpcode::G_SSUBO:
1326 case TargetOpcode::G_SADDE:
1327 case TargetOpcode::G_SSUBE:
1328 case TargetOpcode::G_UADDO:
1329 case TargetOpcode::G_USUBO:
1330 case TargetOpcode::G_UADDE:
1331 case TargetOpcode::G_USUBE:
1332 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1333 case TargetOpcode::G_MUL:
1334 case TargetOpcode::G_UMULH:
1335 return narrowScalarMul(MI, NarrowTy);
1336 case TargetOpcode::G_EXTRACT:
1337 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1338 case TargetOpcode::G_INSERT:
1339 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1340 case TargetOpcode::G_LOAD: {
1341 auto &LoadMI = cast<GLoad>(MI);
1342 Register DstReg = LoadMI.getDstReg();
1343 LLT DstTy = MRI.getType(DstReg);
1344 if (DstTy.isVector())
1345 return UnableToLegalize;
1346
1347 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1348 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1349 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1350 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1351 LoadMI.eraseFromParent();
1352 return Legalized;
1353 }
1354
1355 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1356 }
1357 case TargetOpcode::G_ZEXTLOAD:
1358 case TargetOpcode::G_SEXTLOAD: {
1359 auto &LoadMI = cast<GExtLoad>(MI);
1360 Register DstReg = LoadMI.getDstReg();
1361 Register PtrReg = LoadMI.getPointerReg();
1362
1363 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1364 auto &MMO = LoadMI.getMMO();
1365 unsigned MemSize = MMO.getSizeInBits().getValue();
1366
1367 if (MemSize == NarrowSize) {
1368 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1369 } else if (MemSize < NarrowSize) {
1370 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1371 } else if (MemSize > NarrowSize) {
1372 // FIXME: Need to split the load.
1373 return UnableToLegalize;
1374 }
1375
1376 if (isa<GZExtLoad>(LoadMI))
1377 MIRBuilder.buildZExt(DstReg, TmpReg);
1378 else
1379 MIRBuilder.buildSExt(DstReg, TmpReg);
1380
1381 LoadMI.eraseFromParent();
1382 return Legalized;
1383 }
1384 case TargetOpcode::G_STORE: {
1385 auto &StoreMI = cast<GStore>(MI);
1386
1387 Register SrcReg = StoreMI.getValueReg();
1388 LLT SrcTy = MRI.getType(SrcReg);
1389 if (SrcTy.isVector())
1390 return UnableToLegalize;
1391
1392 int NumParts = SizeOp0 / NarrowSize;
1393 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1394 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1395 if (SrcTy.isVector() && LeftoverBits != 0)
1396 return UnableToLegalize;
1397
1398 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1399 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1400 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1401 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1402 StoreMI.eraseFromParent();
1403 return Legalized;
1404 }
1405
1406 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1407 }
1408 case TargetOpcode::G_SELECT:
1409 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1410 case TargetOpcode::G_AND:
1411 case TargetOpcode::G_OR:
1412 case TargetOpcode::G_XOR: {
1413 // Legalize bitwise operation:
1414 // A = BinOp<Ty> B, C
1415 // into:
1416 // B1, ..., BN = G_UNMERGE_VALUES B
1417 // C1, ..., CN = G_UNMERGE_VALUES C
1418 // A1 = BinOp<Ty/N> B1, C2
1419 // ...
1420 // AN = BinOp<Ty/N> BN, CN
1421 // A = G_MERGE_VALUES A1, ..., AN
1422 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1423 }
1424 case TargetOpcode::G_SHL:
1425 case TargetOpcode::G_LSHR:
1426 case TargetOpcode::G_ASHR:
1427 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1428 case TargetOpcode::G_CTLZ:
1429 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1430 case TargetOpcode::G_CTTZ:
1431 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1432 case TargetOpcode::G_CTPOP:
1433 if (TypeIdx == 1)
1434 switch (MI.getOpcode()) {
1435 case TargetOpcode::G_CTLZ:
1436 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1437 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1438 case TargetOpcode::G_CTTZ:
1439 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1440 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1441 case TargetOpcode::G_CTPOP:
1442 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1443 default:
1444 return UnableToLegalize;
1445 }
1446
1448 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1450 return Legalized;
1451 case TargetOpcode::G_INTTOPTR:
1452 if (TypeIdx != 1)
1453 return UnableToLegalize;
1454
1456 narrowScalarSrc(MI, NarrowTy, 1);
1458 return Legalized;
1459 case TargetOpcode::G_PTRTOINT:
1460 if (TypeIdx != 0)
1461 return UnableToLegalize;
1462
1464 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1466 return Legalized;
1467 case TargetOpcode::G_PHI: {
1468 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1469 // NarrowSize.
1470 if (SizeOp0 % NarrowSize != 0)
1471 return UnableToLegalize;
1472
1473 unsigned NumParts = SizeOp0 / NarrowSize;
1474 SmallVector<Register, 2> DstRegs(NumParts);
1475 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1477 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1478 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1480 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1481 SrcRegs[i / 2], MIRBuilder, MRI);
1482 }
1483 MachineBasicBlock &MBB = *MI.getParent();
1485 for (unsigned i = 0; i < NumParts; ++i) {
1486 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1488 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1489 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1490 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1491 }
1493 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1495 MI.eraseFromParent();
1496 return Legalized;
1497 }
1498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1499 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1500 if (TypeIdx != 2)
1501 return UnableToLegalize;
1502
1503 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1505 narrowScalarSrc(MI, NarrowTy, OpIdx);
1507 return Legalized;
1508 }
1509 case TargetOpcode::G_ICMP: {
1510 Register LHS = MI.getOperand(2).getReg();
1511 LLT SrcTy = MRI.getType(LHS);
1512 uint64_t SrcSize = SrcTy.getSizeInBits();
1513 CmpInst::Predicate Pred =
1514 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1515
1516 // TODO: Handle the non-equality case for weird sizes.
1517 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1518 return UnableToLegalize;
1519
1520 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1521 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1522 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1523 LHSLeftoverRegs, MIRBuilder, MRI))
1524 return UnableToLegalize;
1525
1526 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1527 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1528 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1529 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1530 return UnableToLegalize;
1531
1532 // We now have the LHS and RHS of the compare split into narrow-type
1533 // registers, plus potentially some leftover type.
1534 Register Dst = MI.getOperand(0).getReg();
1535 LLT ResTy = MRI.getType(Dst);
1536 if (ICmpInst::isEquality(Pred)) {
1537 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1538 // them together. For each equal part, the result should be all 0s. For
1539 // each non-equal part, we'll get at least one 1.
1540 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1542 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1543 auto LHS = std::get<0>(LHSAndRHS);
1544 auto RHS = std::get<1>(LHSAndRHS);
1545 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1546 Xors.push_back(Xor);
1547 }
1548
1549 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1550 // to the desired narrow type so that we can OR them together later.
1551 SmallVector<Register, 4> WidenedXors;
1552 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1553 auto LHS = std::get<0>(LHSAndRHS);
1554 auto RHS = std::get<1>(LHSAndRHS);
1555 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1556 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1557 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1558 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1559 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1560 }
1561
1562 // Now, for each part we broke up, we know if they are equal/not equal
1563 // based off the G_XOR. We can OR these all together and compare against
1564 // 0 to get the result.
1565 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1566 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1567 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1568 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1569 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1570 } else {
1571 // TODO: Handle non-power-of-two types.
1572 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1573 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1574 Register LHSL = LHSPartRegs[0];
1575 Register LHSH = LHSPartRegs[1];
1576 Register RHSL = RHSPartRegs[0];
1577 Register RHSH = RHSPartRegs[1];
1578 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1579 MachineInstrBuilder CmpHEQ =
1582 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1583 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1584 }
1585 MI.eraseFromParent();
1586 return Legalized;
1587 }
1588 case TargetOpcode::G_FCMP:
1589 if (TypeIdx != 0)
1590 return UnableToLegalize;
1591
1593 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1595 return Legalized;
1596
1597 case TargetOpcode::G_SEXT_INREG: {
1598 if (TypeIdx != 0)
1599 return UnableToLegalize;
1600
1601 int64_t SizeInBits = MI.getOperand(2).getImm();
1602
1603 // So long as the new type has more bits than the bits we're extending we
1604 // don't need to break it apart.
1605 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1607 // We don't lose any non-extension bits by truncating the src and
1608 // sign-extending the dst.
1609 MachineOperand &MO1 = MI.getOperand(1);
1610 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1611 MO1.setReg(TruncMIB.getReg(0));
1612
1613 MachineOperand &MO2 = MI.getOperand(0);
1614 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1616 MIRBuilder.buildSExt(MO2, DstExt);
1617 MO2.setReg(DstExt);
1619 return Legalized;
1620 }
1621
1622 // Break it apart. Components below the extension point are unmodified. The
1623 // component containing the extension point becomes a narrower SEXT_INREG.
1624 // Components above it are ashr'd from the component containing the
1625 // extension point.
1626 if (SizeOp0 % NarrowSize != 0)
1627 return UnableToLegalize;
1628 int NumParts = SizeOp0 / NarrowSize;
1629
1630 // List the registers where the destination will be scattered.
1632 // List the registers where the source will be split.
1634
1635 // Create all the temporary registers.
1636 for (int i = 0; i < NumParts; ++i) {
1637 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1638
1639 SrcRegs.push_back(SrcReg);
1640 }
1641
1642 // Explode the big arguments into smaller chunks.
1643 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1644
1645 Register AshrCstReg =
1646 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1647 .getReg(0);
1648 Register FullExtensionReg;
1649 Register PartialExtensionReg;
1650
1651 // Do the operation on each small part.
1652 for (int i = 0; i < NumParts; ++i) {
1653 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1654 DstRegs.push_back(SrcRegs[i]);
1655 PartialExtensionReg = DstRegs.back();
1656 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1657 assert(PartialExtensionReg &&
1658 "Expected to visit partial extension before full");
1659 if (FullExtensionReg) {
1660 DstRegs.push_back(FullExtensionReg);
1661 continue;
1662 }
1663 DstRegs.push_back(
1664 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1665 .getReg(0));
1666 FullExtensionReg = DstRegs.back();
1667 } else {
1668 DstRegs.push_back(
1670 .buildInstr(
1671 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1672 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1673 .getReg(0));
1674 PartialExtensionReg = DstRegs.back();
1675 }
1676 }
1677
1678 // Gather the destination registers into the final destination.
1679 Register DstReg = MI.getOperand(0).getReg();
1680 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1681 MI.eraseFromParent();
1682 return Legalized;
1683 }
1684 case TargetOpcode::G_BSWAP:
1685 case TargetOpcode::G_BITREVERSE: {
1686 if (SizeOp0 % NarrowSize != 0)
1687 return UnableToLegalize;
1688
1690 SmallVector<Register, 2> SrcRegs, DstRegs;
1691 unsigned NumParts = SizeOp0 / NarrowSize;
1692 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1693 MIRBuilder, MRI);
1694
1695 for (unsigned i = 0; i < NumParts; ++i) {
1696 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1697 {SrcRegs[NumParts - 1 - i]});
1698 DstRegs.push_back(DstPart.getReg(0));
1699 }
1700
1701 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1702
1704 MI.eraseFromParent();
1705 return Legalized;
1706 }
1707 case TargetOpcode::G_PTR_ADD:
1708 case TargetOpcode::G_PTRMASK: {
1709 if (TypeIdx != 1)
1710 return UnableToLegalize;
1712 narrowScalarSrc(MI, NarrowTy, 2);
1714 return Legalized;
1715 }
1716 case TargetOpcode::G_FPTOUI:
1717 case TargetOpcode::G_FPTOSI:
1718 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1719 case TargetOpcode::G_FPEXT:
1720 if (TypeIdx != 0)
1721 return UnableToLegalize;
1723 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1725 return Legalized;
1726 case TargetOpcode::G_FLDEXP:
1727 case TargetOpcode::G_STRICT_FLDEXP:
1728 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1729 case TargetOpcode::G_VSCALE: {
1730 Register Dst = MI.getOperand(0).getReg();
1731 LLT Ty = MRI.getType(Dst);
1732
1733 // Assume VSCALE(1) fits into a legal integer
1734 const APInt One(NarrowTy.getSizeInBits(), 1);
1735 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1736 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1737 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1738 MIRBuilder.buildMul(Dst, ZExt, C);
1739
1740 MI.eraseFromParent();
1741 return Legalized;
1742 }
1743 }
1744}
1745
1747 LLT Ty = MRI.getType(Val);
1748 if (Ty.isScalar())
1749 return Val;
1750
1752 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1753 if (Ty.isPointer()) {
1754 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1755 return Register();
1756 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1757 }
1758
1759 Register NewVal = Val;
1760
1761 assert(Ty.isVector());
1762 if (Ty.isPointerVector())
1763 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1764 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1765}
1766
1768 unsigned OpIdx, unsigned ExtOpcode) {
1769 MachineOperand &MO = MI.getOperand(OpIdx);
1770 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1771 MO.setReg(ExtB.getReg(0));
1772}
1773
1775 unsigned OpIdx) {
1776 MachineOperand &MO = MI.getOperand(OpIdx);
1777 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1778 MO.setReg(ExtB.getReg(0));
1779}
1780
1782 unsigned OpIdx, unsigned TruncOpcode) {
1783 MachineOperand &MO = MI.getOperand(OpIdx);
1784 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1786 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1787 MO.setReg(DstExt);
1788}
1789
1791 unsigned OpIdx, unsigned ExtOpcode) {
1792 MachineOperand &MO = MI.getOperand(OpIdx);
1793 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1795 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1796 MO.setReg(DstTrunc);
1797}
1798
1800 unsigned OpIdx) {
1801 MachineOperand &MO = MI.getOperand(OpIdx);
1803 Register Dst = MO.getReg();
1804 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1805 MO.setReg(DstExt);
1807}
1808
1810 unsigned OpIdx) {
1811 MachineOperand &MO = MI.getOperand(OpIdx);
1814}
1815
1816void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1817 MachineOperand &Op = MI.getOperand(OpIdx);
1818 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1819}
1820
1821void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1822 MachineOperand &MO = MI.getOperand(OpIdx);
1823 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1825 MIRBuilder.buildBitcast(MO, CastDst);
1826 MO.setReg(CastDst);
1827}
1828
1830LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1831 LLT WideTy) {
1832 if (TypeIdx != 1)
1833 return UnableToLegalize;
1834
1835 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1836 if (DstTy.isVector())
1837 return UnableToLegalize;
1838
1839 LLT SrcTy = MRI.getType(Src1Reg);
1840 const int DstSize = DstTy.getSizeInBits();
1841 const int SrcSize = SrcTy.getSizeInBits();
1842 const int WideSize = WideTy.getSizeInBits();
1843 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1844
1845 unsigned NumOps = MI.getNumOperands();
1846 unsigned NumSrc = MI.getNumOperands() - 1;
1847 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1848
1849 if (WideSize >= DstSize) {
1850 // Directly pack the bits in the target type.
1851 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1852
1853 for (unsigned I = 2; I != NumOps; ++I) {
1854 const unsigned Offset = (I - 1) * PartSize;
1855
1856 Register SrcReg = MI.getOperand(I).getReg();
1857 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1858
1859 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1860
1861 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1862 MRI.createGenericVirtualRegister(WideTy);
1863
1864 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1865 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1866 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1867 ResultReg = NextResult;
1868 }
1869
1870 if (WideSize > DstSize)
1871 MIRBuilder.buildTrunc(DstReg, ResultReg);
1872 else if (DstTy.isPointer())
1873 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1874
1875 MI.eraseFromParent();
1876 return Legalized;
1877 }
1878
1879 // Unmerge the original values to the GCD type, and recombine to the next
1880 // multiple greater than the original type.
1881 //
1882 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1883 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1884 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1885 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1886 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1887 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1888 // %12:_(s12) = G_MERGE_VALUES %10, %11
1889 //
1890 // Padding with undef if necessary:
1891 //
1892 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1893 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1894 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1895 // %7:_(s2) = G_IMPLICIT_DEF
1896 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1897 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1898 // %10:_(s12) = G_MERGE_VALUES %8, %9
1899
1900 const int GCD = std::gcd(SrcSize, WideSize);
1901 LLT GCDTy = LLT::scalar(GCD);
1902
1904 SmallVector<Register, 8> NewMergeRegs;
1905 SmallVector<Register, 8> Unmerges;
1906 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1907
1908 // Decompose the original operands if they don't evenly divide.
1909 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1910 Register SrcReg = MO.getReg();
1911 if (GCD == SrcSize) {
1912 Unmerges.push_back(SrcReg);
1913 } else {
1914 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1915 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1916 Unmerges.push_back(Unmerge.getReg(J));
1917 }
1918 }
1919
1920 // Pad with undef to the next size that is a multiple of the requested size.
1921 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1922 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1923 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1924 Unmerges.push_back(UndefReg);
1925 }
1926
1927 const int PartsPerGCD = WideSize / GCD;
1928
1929 // Build merges of each piece.
1930 ArrayRef<Register> Slicer(Unmerges);
1931 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1932 auto Merge =
1933 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1934 NewMergeRegs.push_back(Merge.getReg(0));
1935 }
1936
1937 // A truncate may be necessary if the requested type doesn't evenly divide the
1938 // original result type.
1939 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1940 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1941 } else {
1942 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1943 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1944 }
1945
1946 MI.eraseFromParent();
1947 return Legalized;
1948}
1949
1951LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1952 LLT WideTy) {
1953 if (TypeIdx != 0)
1954 return UnableToLegalize;
1955
1956 int NumDst = MI.getNumOperands() - 1;
1957 Register SrcReg = MI.getOperand(NumDst).getReg();
1958 LLT SrcTy = MRI.getType(SrcReg);
1959 if (SrcTy.isVector())
1960 return UnableToLegalize;
1961
1962 Register Dst0Reg = MI.getOperand(0).getReg();
1963 LLT DstTy = MRI.getType(Dst0Reg);
1964 if (!DstTy.isScalar())
1965 return UnableToLegalize;
1966
1967 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1968 if (SrcTy.isPointer()) {
1970 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1971 LLVM_DEBUG(
1972 dbgs() << "Not casting non-integral address space integer\n");
1973 return UnableToLegalize;
1974 }
1975
1976 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1977 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1978 }
1979
1980 // Widen SrcTy to WideTy. This does not affect the result, but since the
1981 // user requested this size, it is probably better handled than SrcTy and
1982 // should reduce the total number of legalization artifacts.
1983 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1984 SrcTy = WideTy;
1985 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1986 }
1987
1988 // Theres no unmerge type to target. Directly extract the bits from the
1989 // source type
1990 unsigned DstSize = DstTy.getSizeInBits();
1991
1992 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1993 for (int I = 1; I != NumDst; ++I) {
1994 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1995 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1996 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1997 }
1998
1999 MI.eraseFromParent();
2000 return Legalized;
2001 }
2002
2003 // Extend the source to a wider type.
2004 LLT LCMTy = getLCMType(SrcTy, WideTy);
2005
2006 Register WideSrc = SrcReg;
2007 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2008 // TODO: If this is an integral address space, cast to integer and anyext.
2009 if (SrcTy.isPointer()) {
2010 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2011 return UnableToLegalize;
2012 }
2013
2014 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2015 }
2016
2017 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2018
2019 // Create a sequence of unmerges and merges to the original results. Since we
2020 // may have widened the source, we will need to pad the results with dead defs
2021 // to cover the source register.
2022 // e.g. widen s48 to s64:
2023 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2024 //
2025 // =>
2026 // %4:_(s192) = G_ANYEXT %0:_(s96)
2027 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2028 // ; unpack to GCD type, with extra dead defs
2029 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2030 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2031 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2032 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2033 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2034 const LLT GCDTy = getGCDType(WideTy, DstTy);
2035 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2036 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2037
2038 // Directly unmerge to the destination without going through a GCD type
2039 // if possible
2040 if (PartsPerRemerge == 1) {
2041 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2042
2043 for (int I = 0; I != NumUnmerge; ++I) {
2044 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2045
2046 for (int J = 0; J != PartsPerUnmerge; ++J) {
2047 int Idx = I * PartsPerUnmerge + J;
2048 if (Idx < NumDst)
2049 MIB.addDef(MI.getOperand(Idx).getReg());
2050 else {
2051 // Create dead def for excess components.
2052 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2053 }
2054 }
2055
2056 MIB.addUse(Unmerge.getReg(I));
2057 }
2058 } else {
2060 for (int J = 0; J != NumUnmerge; ++J)
2061 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2062
2063 SmallVector<Register, 8> RemergeParts;
2064 for (int I = 0; I != NumDst; ++I) {
2065 for (int J = 0; J < PartsPerRemerge; ++J) {
2066 const int Idx = I * PartsPerRemerge + J;
2067 RemergeParts.emplace_back(Parts[Idx]);
2068 }
2069
2070 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2071 RemergeParts.clear();
2072 }
2073 }
2074
2075 MI.eraseFromParent();
2076 return Legalized;
2077}
2078
2080LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2081 LLT WideTy) {
2082 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2083 unsigned Offset = MI.getOperand(2).getImm();
2084
2085 if (TypeIdx == 0) {
2086 if (SrcTy.isVector() || DstTy.isVector())
2087 return UnableToLegalize;
2088
2089 SrcOp Src(SrcReg);
2090 if (SrcTy.isPointer()) {
2091 // Extracts from pointers can be handled only if they are really just
2092 // simple integers.
2094 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2095 return UnableToLegalize;
2096
2097 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2098 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2099 SrcTy = SrcAsIntTy;
2100 }
2101
2102 if (DstTy.isPointer())
2103 return UnableToLegalize;
2104
2105 if (Offset == 0) {
2106 // Avoid a shift in the degenerate case.
2107 MIRBuilder.buildTrunc(DstReg,
2108 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2109 MI.eraseFromParent();
2110 return Legalized;
2111 }
2112
2113 // Do a shift in the source type.
2114 LLT ShiftTy = SrcTy;
2115 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2116 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2117 ShiftTy = WideTy;
2118 }
2119
2120 auto LShr = MIRBuilder.buildLShr(
2121 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2122 MIRBuilder.buildTrunc(DstReg, LShr);
2123 MI.eraseFromParent();
2124 return Legalized;
2125 }
2126
2127 if (SrcTy.isScalar()) {
2129 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2131 return Legalized;
2132 }
2133
2134 if (!SrcTy.isVector())
2135 return UnableToLegalize;
2136
2137 if (DstTy != SrcTy.getElementType())
2138 return UnableToLegalize;
2139
2140 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2141 return UnableToLegalize;
2142
2144 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2145
2146 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2147 Offset);
2148 widenScalarDst(MI, WideTy.getScalarType(), 0);
2150 return Legalized;
2151}
2152
2154LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2155 LLT WideTy) {
2156 if (TypeIdx != 0 || WideTy.isVector())
2157 return UnableToLegalize;
2159 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2160 widenScalarDst(MI, WideTy);
2162 return Legalized;
2163}
2164
2166LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2167 LLT WideTy) {
2168 unsigned Opcode;
2169 unsigned ExtOpcode;
2170 std::optional<Register> CarryIn;
2171 switch (MI.getOpcode()) {
2172 default:
2173 llvm_unreachable("Unexpected opcode!");
2174 case TargetOpcode::G_SADDO:
2175 Opcode = TargetOpcode::G_ADD;
2176 ExtOpcode = TargetOpcode::G_SEXT;
2177 break;
2178 case TargetOpcode::G_SSUBO:
2179 Opcode = TargetOpcode::G_SUB;
2180 ExtOpcode = TargetOpcode::G_SEXT;
2181 break;
2182 case TargetOpcode::G_UADDO:
2183 Opcode = TargetOpcode::G_ADD;
2184 ExtOpcode = TargetOpcode::G_ZEXT;
2185 break;
2186 case TargetOpcode::G_USUBO:
2187 Opcode = TargetOpcode::G_SUB;
2188 ExtOpcode = TargetOpcode::G_ZEXT;
2189 break;
2190 case TargetOpcode::G_SADDE:
2191 Opcode = TargetOpcode::G_UADDE;
2192 ExtOpcode = TargetOpcode::G_SEXT;
2193 CarryIn = MI.getOperand(4).getReg();
2194 break;
2195 case TargetOpcode::G_SSUBE:
2196 Opcode = TargetOpcode::G_USUBE;
2197 ExtOpcode = TargetOpcode::G_SEXT;
2198 CarryIn = MI.getOperand(4).getReg();
2199 break;
2200 case TargetOpcode::G_UADDE:
2201 Opcode = TargetOpcode::G_UADDE;
2202 ExtOpcode = TargetOpcode::G_ZEXT;
2203 CarryIn = MI.getOperand(4).getReg();
2204 break;
2205 case TargetOpcode::G_USUBE:
2206 Opcode = TargetOpcode::G_USUBE;
2207 ExtOpcode = TargetOpcode::G_ZEXT;
2208 CarryIn = MI.getOperand(4).getReg();
2209 break;
2210 }
2211
2212 if (TypeIdx == 1) {
2213 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2214
2216 if (CarryIn)
2217 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2218 widenScalarDst(MI, WideTy, 1);
2219
2221 return Legalized;
2222 }
2223
2224 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2225 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2226 // Do the arithmetic in the larger type.
2227 Register NewOp;
2228 if (CarryIn) {
2229 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2230 NewOp = MIRBuilder
2231 .buildInstr(Opcode, {WideTy, CarryOutTy},
2232 {LHSExt, RHSExt, *CarryIn})
2233 .getReg(0);
2234 } else {
2235 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2236 }
2237 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2238 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2239 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2240 // There is no overflow if the ExtOp is the same as NewOp.
2241 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2242 // Now trunc the NewOp to the original result.
2243 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2244 MI.eraseFromParent();
2245 return Legalized;
2246}
2247
2249LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2250 LLT WideTy) {
2251 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2252 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2253 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2254 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2255 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2256 // We can convert this to:
2257 // 1. Any extend iN to iM
2258 // 2. SHL by M-N
2259 // 3. [US][ADD|SUB|SHL]SAT
2260 // 4. L/ASHR by M-N
2261 //
2262 // It may be more efficient to lower this to a min and a max operation in
2263 // the higher precision arithmetic if the promoted operation isn't legal,
2264 // but this decision is up to the target's lowering request.
2265 Register DstReg = MI.getOperand(0).getReg();
2266
2267 unsigned NewBits = WideTy.getScalarSizeInBits();
2268 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2269
2270 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2271 // must not left shift the RHS to preserve the shift amount.
2272 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2273 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2274 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2275 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2276 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2277 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2278
2279 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2280 {ShiftL, ShiftR}, MI.getFlags());
2281
2282 // Use a shift that will preserve the number of sign bits when the trunc is
2283 // folded away.
2284 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2285 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2286
2287 MIRBuilder.buildTrunc(DstReg, Result);
2288 MI.eraseFromParent();
2289 return Legalized;
2290}
2291
2293LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2294 LLT WideTy) {
2295 if (TypeIdx == 1) {
2297 widenScalarDst(MI, WideTy, 1);
2299 return Legalized;
2300 }
2301
2302 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2303 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2304 LLT SrcTy = MRI.getType(LHS);
2305 LLT OverflowTy = MRI.getType(OriginalOverflow);
2306 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2307
2308 // To determine if the result overflowed in the larger type, we extend the
2309 // input to the larger type, do the multiply (checking if it overflows),
2310 // then also check the high bits of the result to see if overflow happened
2311 // there.
2312 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2313 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2314 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2315
2316 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2317 // so we don't need to check the overflow result of larger type Mulo.
2318 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2319
2320 unsigned MulOpc =
2321 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2322
2324 if (WideMulCanOverflow)
2325 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2326 {LeftOperand, RightOperand});
2327 else
2328 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2329
2330 auto Mul = Mulo->getOperand(0);
2331 MIRBuilder.buildTrunc(Result, Mul);
2332
2333 MachineInstrBuilder ExtResult;
2334 // Overflow occurred if it occurred in the larger type, or if the high part
2335 // of the result does not zero/sign-extend the low part. Check this second
2336 // possibility first.
2337 if (IsSigned) {
2338 // For signed, overflow occurred when the high part does not sign-extend
2339 // the low part.
2340 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2341 } else {
2342 // Unsigned overflow occurred when the high part does not zero-extend the
2343 // low part.
2344 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2345 }
2346
2347 if (WideMulCanOverflow) {
2348 auto Overflow =
2349 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2350 // Finally check if the multiplication in the larger type itself overflowed.
2351 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2352 } else {
2353 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2354 }
2355 MI.eraseFromParent();
2356 return Legalized;
2357}
2358
2361 switch (MI.getOpcode()) {
2362 default:
2363 return UnableToLegalize;
2364 case TargetOpcode::G_ATOMICRMW_XCHG:
2365 case TargetOpcode::G_ATOMICRMW_ADD:
2366 case TargetOpcode::G_ATOMICRMW_SUB:
2367 case TargetOpcode::G_ATOMICRMW_AND:
2368 case TargetOpcode::G_ATOMICRMW_OR:
2369 case TargetOpcode::G_ATOMICRMW_XOR:
2370 case TargetOpcode::G_ATOMICRMW_MIN:
2371 case TargetOpcode::G_ATOMICRMW_MAX:
2372 case TargetOpcode::G_ATOMICRMW_UMIN:
2373 case TargetOpcode::G_ATOMICRMW_UMAX:
2374 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2376 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2377 widenScalarDst(MI, WideTy, 0);
2379 return Legalized;
2380 case TargetOpcode::G_ATOMIC_CMPXCHG:
2381 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2383 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2384 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2385 widenScalarDst(MI, WideTy, 0);
2387 return Legalized;
2388 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2389 if (TypeIdx == 0) {
2391 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2392 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2393 widenScalarDst(MI, WideTy, 0);
2395 return Legalized;
2396 }
2397 assert(TypeIdx == 1 &&
2398 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2400 widenScalarDst(MI, WideTy, 1);
2402 return Legalized;
2403 case TargetOpcode::G_EXTRACT:
2404 return widenScalarExtract(MI, TypeIdx, WideTy);
2405 case TargetOpcode::G_INSERT:
2406 return widenScalarInsert(MI, TypeIdx, WideTy);
2407 case TargetOpcode::G_MERGE_VALUES:
2408 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2409 case TargetOpcode::G_UNMERGE_VALUES:
2410 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2411 case TargetOpcode::G_SADDO:
2412 case TargetOpcode::G_SSUBO:
2413 case TargetOpcode::G_UADDO:
2414 case TargetOpcode::G_USUBO:
2415 case TargetOpcode::G_SADDE:
2416 case TargetOpcode::G_SSUBE:
2417 case TargetOpcode::G_UADDE:
2418 case TargetOpcode::G_USUBE:
2419 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_UMULO:
2421 case TargetOpcode::G_SMULO:
2422 return widenScalarMulo(MI, TypeIdx, WideTy);
2423 case TargetOpcode::G_SADDSAT:
2424 case TargetOpcode::G_SSUBSAT:
2425 case TargetOpcode::G_SSHLSAT:
2426 case TargetOpcode::G_UADDSAT:
2427 case TargetOpcode::G_USUBSAT:
2428 case TargetOpcode::G_USHLSAT:
2429 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2430 case TargetOpcode::G_CTTZ:
2431 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2432 case TargetOpcode::G_CTLZ:
2433 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2434 case TargetOpcode::G_CTPOP: {
2435 if (TypeIdx == 0) {
2437 widenScalarDst(MI, WideTy, 0);
2439 return Legalized;
2440 }
2441
2442 Register SrcReg = MI.getOperand(1).getReg();
2443
2444 // First extend the input.
2445 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2446 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2447 ? TargetOpcode::G_ANYEXT
2448 : TargetOpcode::G_ZEXT;
2449 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2450 LLT CurTy = MRI.getType(SrcReg);
2451 unsigned NewOpc = MI.getOpcode();
2452 if (NewOpc == TargetOpcode::G_CTTZ) {
2453 // The count is the same in the larger type except if the original
2454 // value was zero. This can be handled by setting the bit just off
2455 // the top of the original type.
2456 auto TopBit =
2458 MIBSrc = MIRBuilder.buildOr(
2459 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2460 // Now we know the operand is non-zero, use the more relaxed opcode.
2461 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2462 }
2463
2464 // Perform the operation at the larger size.
2465 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2466 // This is already the correct result for CTPOP and CTTZs
2467 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2468 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2469 // The correct result is NewOp - (Difference in widety and current ty).
2470 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2471 MIBNewOp = MIRBuilder.buildSub(
2472 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2473 }
2474
2475 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2476 MI.eraseFromParent();
2477 return Legalized;
2478 }
2479 case TargetOpcode::G_BSWAP: {
2481 Register DstReg = MI.getOperand(0).getReg();
2482
2483 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2484 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2485 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2486 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2487
2488 MI.getOperand(0).setReg(DstExt);
2489
2491
2492 LLT Ty = MRI.getType(DstReg);
2493 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2494 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2495 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2496
2497 MIRBuilder.buildTrunc(DstReg, ShrReg);
2499 return Legalized;
2500 }
2501 case TargetOpcode::G_BITREVERSE: {
2503
2504 Register DstReg = MI.getOperand(0).getReg();
2505 LLT Ty = MRI.getType(DstReg);
2506 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2507
2508 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2509 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2510 MI.getOperand(0).setReg(DstExt);
2512
2513 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2514 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2515 MIRBuilder.buildTrunc(DstReg, Shift);
2517 return Legalized;
2518 }
2519 case TargetOpcode::G_FREEZE:
2520 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2522 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2523 widenScalarDst(MI, WideTy);
2525 return Legalized;
2526
2527 case TargetOpcode::G_ABS:
2529 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2530 widenScalarDst(MI, WideTy);
2532 return Legalized;
2533
2534 case TargetOpcode::G_ADD:
2535 case TargetOpcode::G_AND:
2536 case TargetOpcode::G_MUL:
2537 case TargetOpcode::G_OR:
2538 case TargetOpcode::G_XOR:
2539 case TargetOpcode::G_SUB:
2540 case TargetOpcode::G_SHUFFLE_VECTOR:
2541 // Perform operation at larger width (any extension is fines here, high bits
2542 // don't affect the result) and then truncate the result back to the
2543 // original type.
2545 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2546 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2547 widenScalarDst(MI, WideTy);
2549 return Legalized;
2550
2551 case TargetOpcode::G_SBFX:
2552 case TargetOpcode::G_UBFX:
2554
2555 if (TypeIdx == 0) {
2556 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2557 widenScalarDst(MI, WideTy);
2558 } else {
2559 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2560 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2561 }
2562
2564 return Legalized;
2565
2566 case TargetOpcode::G_SHL:
2568
2569 if (TypeIdx == 0) {
2570 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2571 widenScalarDst(MI, WideTy);
2572 } else {
2573 assert(TypeIdx == 1);
2574 // The "number of bits to shift" operand must preserve its value as an
2575 // unsigned integer:
2576 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2577 }
2578
2580 return Legalized;
2581
2582 case TargetOpcode::G_ROTR:
2583 case TargetOpcode::G_ROTL:
2584 if (TypeIdx != 1)
2585 return UnableToLegalize;
2586
2588 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2590 return Legalized;
2591
2592 case TargetOpcode::G_SDIV:
2593 case TargetOpcode::G_SREM:
2594 case TargetOpcode::G_SMIN:
2595 case TargetOpcode::G_SMAX:
2597 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2598 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2599 widenScalarDst(MI, WideTy);
2601 return Legalized;
2602
2603 case TargetOpcode::G_SDIVREM:
2605 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2606 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2607 widenScalarDst(MI, WideTy);
2608 widenScalarDst(MI, WideTy, 1);
2610 return Legalized;
2611
2612 case TargetOpcode::G_ASHR:
2613 case TargetOpcode::G_LSHR:
2615
2616 if (TypeIdx == 0) {
2617 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2618 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2619
2620 widenScalarSrc(MI, WideTy, 1, CvtOp);
2621 widenScalarDst(MI, WideTy);
2622 } else {
2623 assert(TypeIdx == 1);
2624 // The "number of bits to shift" operand must preserve its value as an
2625 // unsigned integer:
2626 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2627 }
2628
2630 return Legalized;
2631 case TargetOpcode::G_UDIV:
2632 case TargetOpcode::G_UREM:
2633 case TargetOpcode::G_UMIN:
2634 case TargetOpcode::G_UMAX:
2636 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2637 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2638 widenScalarDst(MI, WideTy);
2640 return Legalized;
2641
2642 case TargetOpcode::G_UDIVREM:
2644 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2645 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2646 widenScalarDst(MI, WideTy);
2647 widenScalarDst(MI, WideTy, 1);
2649 return Legalized;
2650
2651 case TargetOpcode::G_SELECT:
2653 if (TypeIdx == 0) {
2654 // Perform operation at larger width (any extension is fine here, high
2655 // bits don't affect the result) and then truncate the result back to the
2656 // original type.
2657 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2658 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2659 widenScalarDst(MI, WideTy);
2660 } else {
2661 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2662 // Explicit extension is required here since high bits affect the result.
2663 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2664 }
2666 return Legalized;
2667
2668 case TargetOpcode::G_FPTOSI:
2669 case TargetOpcode::G_FPTOUI:
2670 case TargetOpcode::G_INTRINSIC_LRINT:
2671 case TargetOpcode::G_INTRINSIC_LLRINT:
2672 case TargetOpcode::G_IS_FPCLASS:
2674
2675 if (TypeIdx == 0)
2676 widenScalarDst(MI, WideTy);
2677 else
2678 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2679
2681 return Legalized;
2682 case TargetOpcode::G_SITOFP:
2684
2685 if (TypeIdx == 0)
2686 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2687 else
2688 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2689
2691 return Legalized;
2692 case TargetOpcode::G_UITOFP:
2694
2695 if (TypeIdx == 0)
2696 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2697 else
2698 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2699
2701 return Legalized;
2702 case TargetOpcode::G_LOAD:
2703 case TargetOpcode::G_SEXTLOAD:
2704 case TargetOpcode::G_ZEXTLOAD:
2706 widenScalarDst(MI, WideTy);
2708 return Legalized;
2709
2710 case TargetOpcode::G_STORE: {
2711 if (TypeIdx != 0)
2712 return UnableToLegalize;
2713
2714 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2715 if (!Ty.isScalar())
2716 return UnableToLegalize;
2717
2719
2720 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2721 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2722 widenScalarSrc(MI, WideTy, 0, ExtType);
2723
2725 return Legalized;
2726 }
2727 case TargetOpcode::G_CONSTANT: {
2728 MachineOperand &SrcMO = MI.getOperand(1);
2730 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2731 MRI.getType(MI.getOperand(0).getReg()));
2732 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2733 ExtOpc == TargetOpcode::G_ANYEXT) &&
2734 "Illegal Extend");
2735 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2736 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2737 ? SrcVal.sext(WideTy.getSizeInBits())
2738 : SrcVal.zext(WideTy.getSizeInBits());
2740 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2741
2742 widenScalarDst(MI, WideTy);
2744 return Legalized;
2745 }
2746 case TargetOpcode::G_FCONSTANT: {
2747 // To avoid changing the bits of the constant due to extension to a larger
2748 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2749 MachineOperand &SrcMO = MI.getOperand(1);
2750 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2752 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2753 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2754 MI.eraseFromParent();
2755 return Legalized;
2756 }
2757 case TargetOpcode::G_IMPLICIT_DEF: {
2759 widenScalarDst(MI, WideTy);
2761 return Legalized;
2762 }
2763 case TargetOpcode::G_BRCOND:
2765 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2767 return Legalized;
2768
2769 case TargetOpcode::G_FCMP:
2771 if (TypeIdx == 0)
2772 widenScalarDst(MI, WideTy);
2773 else {
2774 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2775 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2776 }
2778 return Legalized;
2779
2780 case TargetOpcode::G_ICMP:
2782 if (TypeIdx == 0)
2783 widenScalarDst(MI, WideTy);
2784 else {
2785 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2786 MI.getOperand(1).getPredicate()))
2787 ? TargetOpcode::G_SEXT
2788 : TargetOpcode::G_ZEXT;
2789 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2790 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2791 }
2793 return Legalized;
2794
2795 case TargetOpcode::G_PTR_ADD:
2796 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2798 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2800 return Legalized;
2801
2802 case TargetOpcode::G_PHI: {
2803 assert(TypeIdx == 0 && "Expecting only Idx 0");
2804
2806 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2807 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2809 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2810 }
2811
2812 MachineBasicBlock &MBB = *MI.getParent();
2814 widenScalarDst(MI, WideTy);
2816 return Legalized;
2817 }
2818 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2819 if (TypeIdx == 0) {
2820 Register VecReg = MI.getOperand(1).getReg();
2821 LLT VecTy = MRI.getType(VecReg);
2823
2825 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2826 TargetOpcode::G_ANYEXT);
2827
2828 widenScalarDst(MI, WideTy, 0);
2830 return Legalized;
2831 }
2832
2833 if (TypeIdx != 2)
2834 return UnableToLegalize;
2836 // TODO: Probably should be zext
2837 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2839 return Legalized;
2840 }
2841 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2842 if (TypeIdx == 0) {
2844 const LLT WideEltTy = WideTy.getElementType();
2845
2846 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2847 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2848 widenScalarDst(MI, WideTy, 0);
2850 return Legalized;
2851 }
2852
2853 if (TypeIdx == 1) {
2855
2856 Register VecReg = MI.getOperand(1).getReg();
2857 LLT VecTy = MRI.getType(VecReg);
2858 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2859
2860 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2861 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2862 widenScalarDst(MI, WideVecTy, 0);
2864 return Legalized;
2865 }
2866
2867 if (TypeIdx == 2) {
2869 // TODO: Probably should be zext
2870 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2872 return Legalized;
2873 }
2874
2875 return UnableToLegalize;
2876 }
2877 case TargetOpcode::G_FADD:
2878 case TargetOpcode::G_FMUL:
2879 case TargetOpcode::G_FSUB:
2880 case TargetOpcode::G_FMA:
2881 case TargetOpcode::G_FMAD:
2882 case TargetOpcode::G_FNEG:
2883 case TargetOpcode::G_FABS:
2884 case TargetOpcode::G_FCANONICALIZE:
2885 case TargetOpcode::G_FMINNUM:
2886 case TargetOpcode::G_FMAXNUM:
2887 case TargetOpcode::G_FMINNUM_IEEE:
2888 case TargetOpcode::G_FMAXNUM_IEEE:
2889 case TargetOpcode::G_FMINIMUM:
2890 case TargetOpcode::G_FMAXIMUM:
2891 case TargetOpcode::G_FDIV:
2892 case TargetOpcode::G_FREM:
2893 case TargetOpcode::G_FCEIL:
2894 case TargetOpcode::G_FFLOOR:
2895 case TargetOpcode::G_FCOS:
2896 case TargetOpcode::G_FSIN:
2897 case TargetOpcode::G_FTAN:
2898 case TargetOpcode::G_FLOG10:
2899 case TargetOpcode::G_FLOG:
2900 case TargetOpcode::G_FLOG2:
2901 case TargetOpcode::G_FRINT:
2902 case TargetOpcode::G_FNEARBYINT:
2903 case TargetOpcode::G_FSQRT:
2904 case TargetOpcode::G_FEXP:
2905 case TargetOpcode::G_FEXP2:
2906 case TargetOpcode::G_FEXP10:
2907 case TargetOpcode::G_FPOW:
2908 case TargetOpcode::G_INTRINSIC_TRUNC:
2909 case TargetOpcode::G_INTRINSIC_ROUND:
2910 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2911 assert(TypeIdx == 0);
2913
2914 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2915 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2916
2917 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2919 return Legalized;
2920 case TargetOpcode::G_FPOWI:
2921 case TargetOpcode::G_FLDEXP:
2922 case TargetOpcode::G_STRICT_FLDEXP: {
2923 if (TypeIdx == 0) {
2924 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2925 return UnableToLegalize;
2926
2928 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2929 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2931 return Legalized;
2932 }
2933
2934 if (TypeIdx == 1) {
2935 // For some reason SelectionDAG tries to promote to a libcall without
2936 // actually changing the integer type for promotion.
2938 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2940 return Legalized;
2941 }
2942
2943 return UnableToLegalize;
2944 }
2945 case TargetOpcode::G_FFREXP: {
2947
2948 if (TypeIdx == 0) {
2949 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2950 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2951 } else {
2952 widenScalarDst(MI, WideTy, 1);
2953 }
2954
2956 return Legalized;
2957 }
2958 case TargetOpcode::G_INTTOPTR:
2959 if (TypeIdx != 1)
2960 return UnableToLegalize;
2961
2963 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2965 return Legalized;
2966 case TargetOpcode::G_PTRTOINT:
2967 if (TypeIdx != 0)
2968 return UnableToLegalize;
2969
2971 widenScalarDst(MI, WideTy, 0);
2973 return Legalized;
2974 case TargetOpcode::G_BUILD_VECTOR: {
2976
2977 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2978 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2979 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2980
2981 // Avoid changing the result vector type if the source element type was
2982 // requested.
2983 if (TypeIdx == 1) {
2984 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2985 } else {
2986 widenScalarDst(MI, WideTy, 0);
2987 }
2988
2990 return Legalized;
2991 }
2992 case TargetOpcode::G_SEXT_INREG:
2993 if (TypeIdx != 0)
2994 return UnableToLegalize;
2995
2997 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2998 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3000 return Legalized;
3001 case TargetOpcode::G_PTRMASK: {
3002 if (TypeIdx != 1)
3003 return UnableToLegalize;
3005 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3007 return Legalized;
3008 }
3009 case TargetOpcode::G_VECREDUCE_FADD:
3010 case TargetOpcode::G_VECREDUCE_FMUL:
3011 case TargetOpcode::G_VECREDUCE_FMIN:
3012 case TargetOpcode::G_VECREDUCE_FMAX:
3013 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3014 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3015 if (TypeIdx != 0)
3016 return UnableToLegalize;
3018 Register VecReg = MI.getOperand(1).getReg();
3019 LLT VecTy = MRI.getType(VecReg);
3020 LLT WideVecTy = VecTy.isVector()
3021 ? LLT::vector(VecTy.getElementCount(), WideTy)
3022 : WideTy;
3023 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3024 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3026 return Legalized;
3027 }
3028 case TargetOpcode::G_VSCALE: {
3029 MachineOperand &SrcMO = MI.getOperand(1);
3031 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3032 // The CImm is always a signed value
3033 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3035 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3036 widenScalarDst(MI, WideTy);
3038 return Legalized;
3039 }
3040 case TargetOpcode::G_SPLAT_VECTOR: {
3041 if (TypeIdx != 1)
3042 return UnableToLegalize;
3043
3045 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3047 return Legalized;
3048 }
3049 }
3050}
3051
3053 MachineIRBuilder &B, Register Src, LLT Ty) {
3054 auto Unmerge = B.buildUnmerge(Ty, Src);
3055 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3056 Pieces.push_back(Unmerge.getReg(I));
3057}
3058
3059static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3060 MachineIRBuilder &MIRBuilder) {
3061 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3062 MachineFunction &MF = MIRBuilder.getMF();
3063 const DataLayout &DL = MIRBuilder.getDataLayout();
3064 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3065 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3066 LLT DstLLT = MRI.getType(DstReg);
3067
3068 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3069
3070 auto Addr = MIRBuilder.buildConstantPool(
3071 AddrPtrTy,
3072 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3073
3074 MachineMemOperand *MMO =
3076 MachineMemOperand::MOLoad, DstLLT, Alignment);
3077
3078 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3079}
3080
3083 const MachineOperand &ConstOperand = MI.getOperand(1);
3084 const Constant *ConstantVal = ConstOperand.getCImm();
3085
3086 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3087 MI.eraseFromParent();
3088
3089 return Legalized;
3090}
3091
3094 const MachineOperand &ConstOperand = MI.getOperand(1);
3095 const Constant *ConstantVal = ConstOperand.getFPImm();
3096
3097 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3098 MI.eraseFromParent();
3099
3100 return Legalized;
3101}
3102
3105 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3106 if (SrcTy.isVector()) {
3107 LLT SrcEltTy = SrcTy.getElementType();
3109
3110 if (DstTy.isVector()) {
3111 int NumDstElt = DstTy.getNumElements();
3112 int NumSrcElt = SrcTy.getNumElements();
3113
3114 LLT DstEltTy = DstTy.getElementType();
3115 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3116 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3117
3118 // If there's an element size mismatch, insert intermediate casts to match
3119 // the result element type.
3120 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3121 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3122 //
3123 // =>
3124 //
3125 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3126 // %3:_(<2 x s8>) = G_BITCAST %2
3127 // %4:_(<2 x s8>) = G_BITCAST %3
3128 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3129 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3130 SrcPartTy = SrcEltTy;
3131 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3132 //
3133 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3134 //
3135 // =>
3136 //
3137 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3138 // %3:_(s16) = G_BITCAST %2
3139 // %4:_(s16) = G_BITCAST %3
3140 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3141 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3142 DstCastTy = DstEltTy;
3143 }
3144
3145 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3146 for (Register &SrcReg : SrcRegs)
3147 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3148 } else
3149 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3150
3151 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3152 MI.eraseFromParent();
3153 return Legalized;
3154 }
3155
3156 if (DstTy.isVector()) {
3158 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3159 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3160 MI.eraseFromParent();
3161 return Legalized;
3162 }
3163
3164 return UnableToLegalize;
3165}
3166
3167/// Figure out the bit offset into a register when coercing a vector index for
3168/// the wide element type. This is only for the case when promoting vector to
3169/// one with larger elements.
3170//
3171///
3172/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3173/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3175 Register Idx,
3176 unsigned NewEltSize,
3177 unsigned OldEltSize) {
3178 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3179 LLT IdxTy = B.getMRI()->getType(Idx);
3180
3181 // Now figure out the amount we need to shift to get the target bits.
3182 auto OffsetMask = B.buildConstant(
3183 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3184 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3185 return B.buildShl(IdxTy, OffsetIdx,
3186 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3187}
3188
3189/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3190/// is casting to a vector with a smaller element size, perform multiple element
3191/// extracts and merge the results. If this is coercing to a vector with larger
3192/// elements, index the bitcasted vector and extract the target element with bit
3193/// operations. This is intended to force the indexing in the native register
3194/// size for architectures that can dynamically index the register file.
3197 LLT CastTy) {
3198 if (TypeIdx != 1)
3199 return UnableToLegalize;
3200
3201 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3202
3203 LLT SrcEltTy = SrcVecTy.getElementType();
3204 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3205 unsigned OldNumElts = SrcVecTy.getNumElements();
3206
3207 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3208 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3209
3210 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3211 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3212 if (NewNumElts > OldNumElts) {
3213 // Decreasing the vector element size
3214 //
3215 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3216 // =>
3217 // v4i32:castx = bitcast x:v2i64
3218 //
3219 // i64 = bitcast
3220 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3221 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3222 //
3223 if (NewNumElts % OldNumElts != 0)
3224 return UnableToLegalize;
3225
3226 // Type of the intermediate result vector.
3227 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3228 LLT MidTy =
3229 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3230
3231 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3232
3233 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3234 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3235
3236 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3237 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3238 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3239 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3240 NewOps[I] = Elt.getReg(0);
3241 }
3242
3243 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3244 MIRBuilder.buildBitcast(Dst, NewVec);
3245 MI.eraseFromParent();
3246 return Legalized;
3247 }
3248
3249 if (NewNumElts < OldNumElts) {
3250 if (NewEltSize % OldEltSize != 0)
3251 return UnableToLegalize;
3252
3253 // This only depends on powers of 2 because we use bit tricks to figure out
3254 // the bit offset we need to shift to get the target element. A general
3255 // expansion could emit division/multiply.
3256 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3257 return UnableToLegalize;
3258
3259 // Increasing the vector element size.
3260 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3261 //
3262 // =>
3263 //
3264 // %cast = G_BITCAST %vec
3265 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3266 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3267 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3268 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3269 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3270 // %elt = G_TRUNC %elt_bits
3271
3272 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3273 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3274
3275 // Divide to get the index in the wider element type.
3276 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3277
3278 Register WideElt = CastVec;
3279 if (CastTy.isVector()) {
3280 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3281 ScaledIdx).getReg(0);
3282 }
3283
3284 // Compute the bit offset into the register of the target element.
3286 MIRBuilder, Idx, NewEltSize, OldEltSize);
3287
3288 // Shift the wide element to get the target element.
3289 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3290 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3291 MI.eraseFromParent();
3292 return Legalized;
3293 }
3294
3295 return UnableToLegalize;
3296}
3297
3298/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3299/// TargetReg, while preserving other bits in \p TargetReg.
3300///
3301/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3303 Register TargetReg, Register InsertReg,
3304 Register OffsetBits) {
3305 LLT TargetTy = B.getMRI()->getType(TargetReg);
3306 LLT InsertTy = B.getMRI()->getType(InsertReg);
3307 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3308 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3309
3310 // Produce a bitmask of the value to insert
3311 auto EltMask = B.buildConstant(
3312 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3313 InsertTy.getSizeInBits()));
3314 // Shift it into position
3315 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3316 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3317
3318 // Clear out the bits in the wide element
3319 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3320
3321 // The value to insert has all zeros already, so stick it into the masked
3322 // wide element.
3323 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3324}
3325
3326/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3327/// is increasing the element size, perform the indexing in the target element
3328/// type, and use bit operations to insert at the element position. This is
3329/// intended for architectures that can dynamically index the register file and
3330/// want to force indexing in the native register size.
3333 LLT CastTy) {
3334 if (TypeIdx != 0)
3335 return UnableToLegalize;
3336
3337 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3338 MI.getFirst4RegLLTs();
3339 LLT VecTy = DstTy;
3340
3341 LLT VecEltTy = VecTy.getElementType();
3342 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3343 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3344 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3345
3346 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3347 unsigned OldNumElts = VecTy.getNumElements();
3348
3349 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3350 if (NewNumElts < OldNumElts) {
3351 if (NewEltSize % OldEltSize != 0)
3352 return UnableToLegalize;
3353
3354 // This only depends on powers of 2 because we use bit tricks to figure out
3355 // the bit offset we need to shift to get the target element. A general
3356 // expansion could emit division/multiply.
3357 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3358 return UnableToLegalize;
3359
3360 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3361 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3362
3363 // Divide to get the index in the wider element type.
3364 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3365
3366 Register ExtractedElt = CastVec;
3367 if (CastTy.isVector()) {
3368 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3369 ScaledIdx).getReg(0);
3370 }
3371
3372 // Compute the bit offset into the register of the target element.
3374 MIRBuilder, Idx, NewEltSize, OldEltSize);
3375
3376 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3377 Val, OffsetBits);
3378 if (CastTy.isVector()) {
3380 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3381 }
3382
3383 MIRBuilder.buildBitcast(Dst, InsertedElt);
3384 MI.eraseFromParent();
3385 return Legalized;
3386 }
3387
3388 return UnableToLegalize;
3389}
3390
3392 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3393 Register DstReg = LoadMI.getDstReg();
3394 Register PtrReg = LoadMI.getPointerReg();
3395 LLT DstTy = MRI.getType(DstReg);
3396 MachineMemOperand &MMO = LoadMI.getMMO();
3397 LLT MemTy = MMO.getMemoryType();
3399
3400 unsigned MemSizeInBits = MemTy.getSizeInBits();
3401 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3402
3403 if (MemSizeInBits != MemStoreSizeInBits) {
3404 if (MemTy.isVector())
3405 return UnableToLegalize;
3406
3407 // Promote to a byte-sized load if not loading an integral number of
3408 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3409 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3410 MachineMemOperand *NewMMO =
3411 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3412
3413 Register LoadReg = DstReg;
3414 LLT LoadTy = DstTy;
3415
3416 // If this wasn't already an extending load, we need to widen the result
3417 // register to avoid creating a load with a narrower result than the source.
3418 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3419 LoadTy = WideMemTy;
3420 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3421 }
3422
3423 if (isa<GSExtLoad>(LoadMI)) {
3424 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3425 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3426 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3427 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3428 // The extra bits are guaranteed to be zero, since we stored them that
3429 // way. A zext load from Wide thus automatically gives zext from MemVT.
3430 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3431 } else {
3432 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3433 }
3434
3435 if (DstTy != LoadTy)
3436 MIRBuilder.buildTrunc(DstReg, LoadReg);
3437
3438 LoadMI.eraseFromParent();
3439 return Legalized;
3440 }
3441
3442 // Big endian lowering not implemented.
3444 return UnableToLegalize;
3445
3446 // This load needs splitting into power of 2 sized loads.
3447 //
3448 // Our strategy here is to generate anyextending loads for the smaller
3449 // types up to next power-2 result type, and then combine the two larger
3450 // result values together, before truncating back down to the non-pow-2
3451 // type.
3452 // E.g. v1 = i24 load =>
3453 // v2 = i32 zextload (2 byte)
3454 // v3 = i32 load (1 byte)
3455 // v4 = i32 shl v3, 16
3456 // v5 = i32 or v4, v2
3457 // v1 = i24 trunc v5
3458 // By doing this we generate the correct truncate which should get
3459 // combined away as an artifact with a matching extend.
3460
3461 uint64_t LargeSplitSize, SmallSplitSize;
3462
3463 if (!isPowerOf2_32(MemSizeInBits)) {
3464 // This load needs splitting into power of 2 sized loads.
3465 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3466 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3467 } else {
3468 // This is already a power of 2, but we still need to split this in half.
3469 //
3470 // Assume we're being asked to decompose an unaligned load.
3471 // TODO: If this requires multiple splits, handle them all at once.
3472 auto &Ctx = MF.getFunction().getContext();
3473 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3474 return UnableToLegalize;
3475
3476 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3477 }
3478
3479 if (MemTy.isVector()) {
3480 // TODO: Handle vector extloads
3481 if (MemTy != DstTy)
3482 return UnableToLegalize;
3483
3484 // TODO: We can do better than scalarizing the vector and at least split it
3485 // in half.
3486 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3487 }
3488
3489 MachineMemOperand *LargeMMO =
3490 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3491 MachineMemOperand *SmallMMO =
3492 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3493
3494 LLT PtrTy = MRI.getType(PtrReg);
3495 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3496 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3497 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3498 PtrReg, *LargeMMO);
3499
3500 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3501 LargeSplitSize / 8);
3502 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3503 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3504 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3505 SmallPtr, *SmallMMO);
3506
3507 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3508 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3509
3510 if (AnyExtTy == DstTy)
3511 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3512 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3513 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3514 MIRBuilder.buildTrunc(DstReg, {Or});
3515 } else {
3516 assert(DstTy.isPointer() && "expected pointer");
3517 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3518
3519 // FIXME: We currently consider this to be illegal for non-integral address
3520 // spaces, but we need still need a way to reinterpret the bits.
3521 MIRBuilder.buildIntToPtr(DstReg, Or);
3522 }
3523
3524 LoadMI.eraseFromParent();
3525 return Legalized;
3526}
3527
3529 // Lower a non-power of 2 store into multiple pow-2 stores.
3530 // E.g. split an i24 store into an i16 store + i8 store.
3531 // We do this by first extending the stored value to the next largest power
3532 // of 2 type, and then using truncating stores to store the components.
3533 // By doing this, likewise with G_LOAD, generate an extend that can be
3534 // artifact-combined away instead of leaving behind extracts.
3535 Register SrcReg = StoreMI.getValueReg();
3536 Register PtrReg = StoreMI.getPointerReg();
3537 LLT SrcTy = MRI.getType(SrcReg);
3539 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3540 LLT MemTy = MMO.getMemoryType();
3541
3542 unsigned StoreWidth = MemTy.getSizeInBits();
3543 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3544
3545 if (StoreWidth != StoreSizeInBits) {
3546 if (SrcTy.isVector())
3547 return UnableToLegalize;
3548
3549 // Promote to a byte-sized store with upper bits zero if not
3550 // storing an integral number of bytes. For example, promote
3551 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3552 LLT WideTy = LLT::scalar(StoreSizeInBits);
3553
3554 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3555 // Avoid creating a store with a narrower source than result.
3556 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3557 SrcTy = WideTy;
3558 }
3559
3560 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3561
3562 MachineMemOperand *NewMMO =
3563 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3564 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3565 StoreMI.eraseFromParent();
3566 return Legalized;
3567 }
3568
3569 if (MemTy.isVector()) {
3570 // TODO: Handle vector trunc stores
3571 if (MemTy != SrcTy)
3572 return UnableToLegalize;
3573
3574 // TODO: We can do better than scalarizing the vector and at least split it
3575 // in half.
3576 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3577 }
3578
3579 unsigned MemSizeInBits = MemTy.getSizeInBits();
3580 uint64_t LargeSplitSize, SmallSplitSize;
3581
3582 if (!isPowerOf2_32(MemSizeInBits)) {
3583 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3584 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3585 } else {
3586 auto &Ctx = MF.getFunction().getContext();
3587 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3588 return UnableToLegalize; // Don't know what we're being asked to do.
3589
3590 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3591 }
3592
3593 // Extend to the next pow-2. If this store was itself the result of lowering,
3594 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3595 // that's wider than the stored size.
3596 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3597 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3598
3599 if (SrcTy.isPointer()) {
3600 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3601 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3602 }
3603
3604 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3605
3606 // Obtain the smaller value by shifting away the larger value.
3607 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3608 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3609
3610 // Generate the PtrAdd and truncating stores.
3611 LLT PtrTy = MRI.getType(PtrReg);
3612 auto OffsetCst = MIRBuilder.buildConstant(
3613 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3614 auto SmallPtr =
3615 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3616
3617 MachineMemOperand *LargeMMO =
3618 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3619 MachineMemOperand *SmallMMO =
3620 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3621 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3622 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3623 StoreMI.eraseFromParent();
3624 return Legalized;
3625}
3626
3628LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3629 switch (MI.getOpcode()) {
3630 case TargetOpcode::G_LOAD: {
3631 if (TypeIdx != 0)
3632 return UnableToLegalize;
3633 MachineMemOperand &MMO = **MI.memoperands_begin();
3634
3635 // Not sure how to interpret a bitcast of an extending load.
3636 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3637 return UnableToLegalize;
3638
3640 bitcastDst(MI, CastTy, 0);
3641 MMO.setType(CastTy);
3643 return Legalized;
3644 }
3645 case TargetOpcode::G_STORE: {
3646 if (TypeIdx != 0)
3647 return UnableToLegalize;
3648
3649 MachineMemOperand &MMO = **MI.memoperands_begin();
3650
3651 // Not sure how to interpret a bitcast of a truncating store.
3652 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3653 return UnableToLegalize;
3654
3656 bitcastSrc(MI, CastTy, 0);
3657 MMO.setType(CastTy);
3659 return Legalized;
3660 }
3661 case TargetOpcode::G_SELECT: {
3662 if (TypeIdx != 0)
3663 return UnableToLegalize;
3664
3665 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3666 LLVM_DEBUG(
3667 dbgs() << "bitcast action not implemented for vector select\n");
3668 return UnableToLegalize;
3669 }
3670
3672 bitcastSrc(MI, CastTy, 2);
3673 bitcastSrc(MI, CastTy, 3);
3674 bitcastDst(MI, CastTy, 0);
3676 return Legalized;
3677 }
3678 case TargetOpcode::G_AND:
3679 case TargetOpcode::G_OR:
3680 case TargetOpcode::G_XOR: {
3682 bitcastSrc(MI, CastTy, 1);
3683 bitcastSrc(MI, CastTy, 2);
3684 bitcastDst(MI, CastTy, 0);
3686 return Legalized;
3687 }
3688 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3689 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3690 case TargetOpcode::G_INSERT_VECTOR_ELT:
3691 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3692 default:
3693 return UnableToLegalize;
3694 }
3695}
3696
3697// Legalize an instruction by changing the opcode in place.
3698void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3700 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3702}
3703
3705LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3706 using namespace TargetOpcode;
3707
3708 switch(MI.getOpcode()) {
3709 default:
3710 return UnableToLegalize;
3711 case TargetOpcode::G_FCONSTANT:
3712 return lowerFConstant(MI);
3713 case TargetOpcode::G_BITCAST:
3714 return lowerBitcast(MI);
3715 case TargetOpcode::G_SREM:
3716 case TargetOpcode::G_UREM: {
3717 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3718 auto Quot =
3719 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3720 {MI.getOperand(1), MI.getOperand(2)});
3721
3722 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3723 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3724 MI.eraseFromParent();
3725 return Legalized;
3726 }
3727 case TargetOpcode::G_SADDO:
3728 case TargetOpcode::G_SSUBO:
3729 return lowerSADDO_SSUBO(MI);
3730 case TargetOpcode::G_UMULH:
3731 case TargetOpcode::G_SMULH:
3732 return lowerSMULH_UMULH(MI);
3733 case TargetOpcode::G_SMULO:
3734 case TargetOpcode::G_UMULO: {
3735 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3736 // result.
3737 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3738 LLT Ty = MRI.getType(Res);
3739
3740 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3741 ? TargetOpcode::G_SMULH
3742 : TargetOpcode::G_UMULH;
3743
3745 const auto &TII = MIRBuilder.getTII();
3746 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3747 MI.removeOperand(1);
3749
3750 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3751 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3752
3753 // Move insert point forward so we can use the Res register if needed.
3755
3756 // For *signed* multiply, overflow is detected by checking:
3757 // (hi != (lo >> bitwidth-1))
3758 if (Opcode == TargetOpcode::G_SMULH) {
3759 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3760 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3761 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3762 } else {
3763 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3764 }
3765 return Legalized;
3766 }
3767 case TargetOpcode::G_FNEG: {
3768 auto [Res, SubByReg] = MI.getFirst2Regs();
3769 LLT Ty = MRI.getType(Res);
3770
3771 // TODO: Handle vector types once we are able to
3772 // represent them.
3773 if (Ty.isVector())
3774 return UnableToLegalize;
3775 auto SignMask =
3777 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3778 MI.eraseFromParent();
3779 return Legalized;
3780 }
3781 case TargetOpcode::G_FSUB:
3782 case TargetOpcode::G_STRICT_FSUB: {
3783 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3784 LLT Ty = MRI.getType(Res);
3785
3786 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3787 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3788
3789 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3790 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3791 else
3792 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3793
3794 MI.eraseFromParent();
3795 return Legalized;
3796 }
3797 case TargetOpcode::G_FMAD:
3798 return lowerFMad(MI);
3799 case TargetOpcode::G_FFLOOR:
3800 return lowerFFloor(MI);
3801 case TargetOpcode::G_INTRINSIC_ROUND:
3802 return lowerIntrinsicRound(MI);
3803 case TargetOpcode::G_FRINT: {
3804 // Since round even is the assumed rounding mode for unconstrained FP
3805 // operations, rint and roundeven are the same operation.
3806 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3807 return Legalized;
3808 }
3809 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3810 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3811 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3812 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
3813 **MI.memoperands_begin());
3814 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
3815 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3816 MI.eraseFromParent();
3817 return Legalized;
3818 }
3819 case TargetOpcode::G_LOAD:
3820 case TargetOpcode::G_SEXTLOAD:
3821 case TargetOpcode::G_ZEXTLOAD:
3822 return lowerLoad(cast<GAnyLoad>(MI));
3823 case TargetOpcode::G_STORE:
3824 return lowerStore(cast<GStore>(MI));
3825 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3826 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3827 case TargetOpcode::G_CTLZ:
3828 case TargetOpcode::G_CTTZ:
3829 case TargetOpcode::G_CTPOP:
3830 return lowerBitCount(MI);
3831 case G_UADDO: {
3832 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3833
3834 Register NewRes = MRI.cloneVirtualRegister(Res);
3835
3836 MIRBuilder.buildAdd(NewRes, LHS, RHS);
3837 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
3838
3839 MIRBuilder.buildCopy(Res, NewRes);
3840
3841 MI.eraseFromParent();
3842 return Legalized;
3843 }
3844 case G_UADDE: {
3845 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3846 const LLT CondTy = MRI.getType(CarryOut);
3847 const LLT Ty = MRI.getType(Res);
3848
3849 Register NewRes = MRI.cloneVirtualRegister(Res);
3850
3851 // Initial add of the two operands.
3852 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3853
3854 // Initial check for carry.
3855 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3856
3857 // Add the sum and the carry.
3858 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3859 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
3860
3861 // Second check for carry. We can only carry if the initial sum is all 1s
3862 // and the carry is set, resulting in a new sum of 0.
3863 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3864 auto ResEqZero =
3865 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
3866 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3867 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3868
3869 MIRBuilder.buildCopy(Res, NewRes);
3870
3871 MI.eraseFromParent();
3872 return Legalized;
3873 }
3874 case G_USUBO: {
3875 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3876
3877 MIRBuilder.buildSub(Res, LHS, RHS);
3879
3880 MI.eraseFromParent();
3881 return Legalized;
3882 }
3883 case G_USUBE: {
3884 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3885 const LLT CondTy = MRI.getType(BorrowOut);
3886 const LLT Ty = MRI.getType(Res);
3887
3888 // Initial subtract of the two operands.
3889 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3890
3891 // Initial check for borrow.
3892 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3893
3894 // Subtract the borrow from the first subtract.
3895 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3896 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3897
3898 // Second check for borrow. We can only borrow if the initial difference is
3899 // 0 and the borrow is set, resulting in a new difference of all 1s.
3900 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3901 auto TmpResEqZero =
3902 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3903 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3904 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3905
3906 MI.eraseFromParent();
3907 return Legalized;
3908 }
3909 case G_UITOFP:
3910 return lowerUITOFP(MI);
3911 case G_SITOFP:
3912 return lowerSITOFP(MI);
3913 case G_FPTOUI:
3914 return lowerFPTOUI(MI);
3915 case G_FPTOSI:
3916 return lowerFPTOSI(MI);
3917 case G_FPTRUNC:
3918 return lowerFPTRUNC(MI);
3919 case G_FPOWI:
3920 return lowerFPOWI(MI);
3921 case G_SMIN:
3922 case G_SMAX:
3923 case G_UMIN:
3924 case G_UMAX:
3925 return lowerMinMax(MI);
3926 case G_FCOPYSIGN:
3927 return lowerFCopySign(MI);
3928 case G_FMINNUM:
3929 case G_FMAXNUM:
3930 return lowerFMinNumMaxNum(MI);
3931 case G_MERGE_VALUES:
3932 return lowerMergeValues(MI);
3933 case G_UNMERGE_VALUES:
3934 return lowerUnmergeValues(MI);
3935 case TargetOpcode::G_SEXT_INREG: {
3936 assert(MI.getOperand(2).isImm() && "Expected immediate");
3937 int64_t SizeInBits = MI.getOperand(2).getImm();
3938
3939 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3940 LLT DstTy = MRI.getType(DstReg);
3941 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3942
3943 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3944 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3945 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3946 MI.eraseFromParent();
3947 return Legalized;
3948 }
3949 case G_EXTRACT_VECTOR_ELT:
3950 case G_INSERT_VECTOR_ELT:
3952 case G_SHUFFLE_VECTOR:
3953 return lowerShuffleVector(MI);
3954 case G_DYN_STACKALLOC:
3955 return lowerDynStackAlloc(MI);
3956 case G_STACKSAVE:
3957 return lowerStackSave(MI);
3958 case G_STACKRESTORE:
3959 return lowerStackRestore(MI);
3960 case G_EXTRACT:
3961 return lowerExtract(MI);
3962 case G_INSERT:
3963 return lowerInsert(MI);
3964 case G_BSWAP:
3965 return lowerBswap(MI);
3966 case G_BITREVERSE:
3967 return lowerBitreverse(MI);
3968 case G_READ_REGISTER:
3969 case G_WRITE_REGISTER:
3970 return lowerReadWriteRegister(MI);
3971 case G_UADDSAT:
3972 case G_USUBSAT: {
3973 // Try to make a reasonable guess about which lowering strategy to use. The
3974 // target can override this with custom lowering and calling the
3975 // implementation functions.
3976 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3977 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3978 return lowerAddSubSatToMinMax(MI);
3980 }
3981 case G_SADDSAT:
3982 case G_SSUBSAT: {
3983 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3984
3985 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3986 // since it's a shorter expansion. However, we would need to figure out the
3987 // preferred boolean type for the carry out for the query.
3988 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3989 return lowerAddSubSatToMinMax(MI);
3991 }
3992 case G_SSHLSAT:
3993 case G_USHLSAT:
3994 return lowerShlSat(MI);
3995 case G_ABS:
3996 return lowerAbsToAddXor(MI);
3997 case G_SELECT:
3998 return lowerSelect(MI);
3999 case G_IS_FPCLASS:
4000 return lowerISFPCLASS(MI);
4001 case G_SDIVREM:
4002 case G_UDIVREM:
4003 return lowerDIVREM(MI);
4004 case G_FSHL:
4005 case G_FSHR:
4006 return lowerFunnelShift(MI);
4007 case G_ROTL:
4008 case G_ROTR:
4009 return lowerRotate(MI);
4010 case G_MEMSET:
4011 case G_MEMCPY:
4012 case G_MEMMOVE:
4013 return lowerMemCpyFamily(MI);
4014 case G_MEMCPY_INLINE:
4015 return lowerMemcpyInline(MI);
4016 case G_ZEXT:
4017 case G_SEXT:
4018 case G_ANYEXT:
4019 return lowerEXT(MI);
4020 case G_TRUNC:
4021 return lowerTRUNC(MI);
4023 return lowerVectorReduction(MI);
4024 case G_VAARG:
4025 return lowerVAArg(MI);
4026 }
4027}
4028
4030 Align MinAlign) const {
4031 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4032 // datalayout for the preferred alignment. Also there should be a target hook
4033 // for this to allow targets to reduce the alignment and ignore the
4034 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4035 // the type.
4036 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4037}
4038
4041 MachinePointerInfo &PtrInfo) {
4044 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4045
4046 unsigned AddrSpace = DL.getAllocaAddrSpace();
4047 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4048
4049 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4050 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4051}
4052
4054 LLT VecTy) {
4055 LLT IdxTy = B.getMRI()->getType(IdxReg);
4056 unsigned NElts = VecTy.getNumElements();
4057
4058 int64_t IdxVal;
4059 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4060 if (IdxVal < VecTy.getNumElements())
4061 return IdxReg;
4062 // If a constant index would be out of bounds, clamp it as well.
4063 }
4064
4065 if (isPowerOf2_32(NElts)) {
4066 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4067 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4068 }
4069
4070 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4071 .getReg(0);
4072}
4073
4075 Register Index) {
4076 LLT EltTy = VecTy.getElementType();
4077
4078 // Calculate the element offset and add it to the pointer.
4079 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4080 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4081 "Converting bits to bytes lost precision");
4082
4084
4085 // Convert index to the correct size for the address space.
4087 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4088 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4089 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4090 if (IdxTy != MRI.getType(Index))
4092
4093 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4094 MIRBuilder.buildConstant(IdxTy, EltSize));
4095
4096 LLT PtrTy = MRI.getType(VecPtr);
4097 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4098}
4099
4100#ifndef NDEBUG
4101/// Check that all vector operands have same number of elements. Other operands
4102/// should be listed in NonVecOp.
4105 std::initializer_list<unsigned> NonVecOpIndices) {
4106 if (MI.getNumMemOperands() != 0)
4107 return false;
4108
4109 LLT VecTy = MRI.getType(MI.getReg(0));
4110 if (!VecTy.isVector())
4111 return false;
4112 unsigned NumElts = VecTy.getNumElements();
4113
4114 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4115 MachineOperand &Op = MI.getOperand(OpIdx);
4116 if (!Op.isReg()) {
4117 if (!is_contained(NonVecOpIndices, OpIdx))
4118 return false;
4119 continue;
4120 }
4121
4122 LLT Ty = MRI.getType(Op.getReg());
4123 if (!Ty.isVector()) {
4124 if (!is_contained(NonVecOpIndices, OpIdx))
4125 return false;
4126 continue;
4127 }
4128
4129 if (Ty.getNumElements() != NumElts)
4130 return false;
4131 }
4132
4133 return true;
4134}
4135#endif
4136
4137/// Fill \p DstOps with DstOps that have same number of elements combined as
4138/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4139/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4140/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4141static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4142 unsigned NumElts) {
4143 LLT LeftoverTy;
4144 assert(Ty.isVector() && "Expected vector type");
4145 LLT EltTy = Ty.getElementType();
4146 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4147 int NumParts, NumLeftover;
4148 std::tie(NumParts, NumLeftover) =
4149 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4150
4151 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4152 for (int i = 0; i < NumParts; ++i) {
4153 DstOps.push_back(NarrowTy);
4154 }
4155
4156 if (LeftoverTy.isValid()) {
4157 assert(NumLeftover == 1 && "expected exactly one leftover");
4158 DstOps.push_back(LeftoverTy);
4159 }
4160}
4161
4162/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4163/// made from \p Op depending on operand type.
4164static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4165 MachineOperand &Op) {
4166 for (unsigned i = 0; i < N; ++i) {
4167 if (Op.isReg())
4168 Ops.push_back(Op.getReg());
4169 else if (Op.isImm())
4170 Ops.push_back(Op.getImm());
4171 else if (Op.isPredicate())
4172 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4173 else
4174 llvm_unreachable("Unsupported type");
4175 }
4176}
4177
4178// Handle splitting vector operations which need to have the same number of
4179// elements in each type index, but each type index may have a different element
4180// type.
4181//
4182// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4183// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4184// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4185//
4186// Also handles some irregular breakdown cases, e.g.
4187// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4188// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4189// s64 = G_SHL s64, s32
4192 GenericMachineInstr &MI, unsigned NumElts,
4193 std::initializer_list<unsigned> NonVecOpIndices) {
4194 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4195 "Non-compatible opcode or not specified non-vector operands");
4196 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4197
4198 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4199 unsigned NumDefs = MI.getNumDefs();
4200
4201 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4202 // Build instructions with DstOps to use instruction found by CSE directly.
4203 // CSE copies found instruction into given vreg when building with vreg dest.
4204 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4205 // Output registers will be taken from created instructions.
4206 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4207 for (unsigned i = 0; i < NumDefs; ++i) {
4208 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4209 }
4210
4211 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4212 // Operands listed in NonVecOpIndices will be used as is without splitting;
4213 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4214 // scalar condition (op 1), immediate in sext_inreg (op 2).
4215 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4216 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4217 ++UseIdx, ++UseNo) {
4218 if (is_contained(NonVecOpIndices, UseIdx)) {
4219 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4220 MI.getOperand(UseIdx));
4221 } else {
4222 SmallVector<Register, 8> SplitPieces;
4223 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4224 MRI);
4225 for (auto Reg : SplitPieces)
4226 InputOpsPieces[UseNo].push_back(Reg);
4227 }
4228 }
4229
4230 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4231
4232 // Take i-th piece of each input operand split and build sub-vector/scalar
4233 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4234 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4236 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4237 Defs.push_back(OutputOpsPieces[DstNo][i]);
4238
4240 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4241 Uses.push_back(InputOpsPieces[InputNo][i]);
4242
4243 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4244 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4245 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4246 }
4247
4248 // Merge small outputs into MI's output for each def operand.
4249 if (NumLeftovers) {
4250 for (unsigned i = 0; i < NumDefs; ++i)
4251 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4252 } else {
4253 for (unsigned i = 0; i < NumDefs; ++i)
4254 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4255 }
4256
4257 MI.eraseFromParent();
4258 return Legalized;
4259}
4260
4263 unsigned NumElts) {
4264 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4265
4266 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4267 unsigned NumDefs = MI.getNumDefs();
4268
4269 SmallVector<DstOp, 8> OutputOpsPieces;
4270 SmallVector<Register, 8> OutputRegs;
4271 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4272
4273 // Instructions that perform register split will be inserted in basic block
4274 // where register is defined (basic block is in the next operand).
4275 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4276 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4277 UseIdx += 2, ++UseNo) {
4278 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4280 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4281 MIRBuilder, MRI);
4282 }
4283
4284 // Build PHIs with fewer elements.
4285 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4286 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4287 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4288 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4289 Phi.addDef(
4290 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4291 OutputRegs.push_back(Phi.getReg(0));
4292
4293 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4294 Phi.addUse(InputOpsPieces[j][i]);
4295 Phi.add(MI.getOperand(1 + j * 2 + 1));
4296 }
4297 }
4298
4299 // Set the insert point after the existing PHIs
4300 MachineBasicBlock &MBB = *MI.getParent();
4302
4303 // Merge small outputs into MI's def.
4304 if (NumLeftovers) {
4305 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4306 } else {
4307 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4308 }
4309
4310 MI.eraseFromParent();
4311 return Legalized;
4312}
4313
4316 unsigned TypeIdx,
4317 LLT NarrowTy) {
4318 const int NumDst = MI.getNumOperands() - 1;
4319 const Register SrcReg = MI.getOperand(NumDst).getReg();
4320 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4321 LLT SrcTy = MRI.getType(SrcReg);
4322
4323 if (TypeIdx != 1 || NarrowTy == DstTy)
4324 return UnableToLegalize;
4325
4326 // Requires compatible types. Otherwise SrcReg should have been defined by
4327 // merge-like instruction that would get artifact combined. Most likely
4328 // instruction that defines SrcReg has to perform more/fewer elements
4329 // legalization compatible with NarrowTy.
4330 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4331 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4332
4333 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4334 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4335 return UnableToLegalize;
4336
4337 // This is most likely DstTy (smaller then register size) packed in SrcTy
4338 // (larger then register size) and since unmerge was not combined it will be
4339 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4340 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4341
4342 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4343 //
4344 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4345 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4346 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4347 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4348 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4349 const int PartsPerUnmerge = NumDst / NumUnmerge;
4350
4351 for (int I = 0; I != NumUnmerge; ++I) {
4352 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4353
4354 for (int J = 0; J != PartsPerUnmerge; ++J)
4355 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4356 MIB.addUse(Unmerge.getReg(I));
4357 }
4358
4359 MI.eraseFromParent();
4360 return Legalized;
4361}
4362
4365 LLT NarrowTy) {
4366 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4367 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4368 // that should have been artifact combined. Most likely instruction that uses
4369 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4370 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4371 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4372 if (NarrowTy == SrcTy)
4373 return UnableToLegalize;
4374
4375 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4376 // is for old mir tests. Since the changes to more/fewer elements it should no
4377 // longer be possible to generate MIR like this when starting from llvm-ir
4378 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4379 if (TypeIdx == 1) {
4380 assert(SrcTy.isVector() && "Expected vector types");
4381 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4382 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4383 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4384 return UnableToLegalize;
4385 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4386 //
4387 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4388 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4389 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4390 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4391 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4392 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4393
4395 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4396 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4397 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4398 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4399 Elts.push_back(Unmerge.getReg(j));
4400 }
4401
4402 SmallVector<Register, 8> NarrowTyElts;
4403 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4404 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4405 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4406 ++i, Offset += NumNarrowTyElts) {
4407 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4408 NarrowTyElts.push_back(
4409 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4410 }
4411
4412 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4413 MI.eraseFromParent();
4414 return Legalized;
4415 }
4416
4417 assert(TypeIdx == 0 && "Bad type index");
4418 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4419 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4420 return UnableToLegalize;
4421
4422 // This is most likely SrcTy (smaller then register size) packed in DstTy
4423 // (larger then register size) and since merge was not combined it will be
4424 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4425 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4426
4427 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4428 //
4429 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4430 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4431 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4432 SmallVector<Register, 8> NarrowTyElts;
4433 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4434 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4435 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4436 for (unsigned i = 0; i < NumParts; ++i) {
4438 for (unsigned j = 0; j < NumElts; ++j)
4439 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4440 NarrowTyElts.push_back(
4441 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4442 }
4443
4444 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4445 MI.eraseFromParent();
4446 return Legalized;
4447}
4448
4451 unsigned TypeIdx,
4452 LLT NarrowVecTy) {
4453 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4454 Register InsertVal;
4455 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4456
4457 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4458 if (IsInsert)
4459 InsertVal = MI.getOperand(2).getReg();
4460
4461 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4462
4463 // TODO: Handle total scalarization case.
4464 if (!NarrowVecTy.isVector())
4465 return UnableToLegalize;
4466
4467 LLT VecTy = MRI.getType(SrcVec);
4468
4469 // If the index is a constant, we can really break this down as you would
4470 // expect, and index into the target size pieces.
4471 int64_t IdxVal;
4472 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4473 if (MaybeCst) {
4474 IdxVal = MaybeCst->Value.getSExtValue();
4475 // Avoid out of bounds indexing the pieces.
4476 if (IdxVal >= VecTy.getNumElements()) {
4477 MIRBuilder.buildUndef(DstReg);
4478 MI.eraseFromParent();
4479 return Legalized;
4480 }
4481
4482 SmallVector<Register, 8> VecParts;
4483 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4484
4485 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4486 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4487 TargetOpcode::G_ANYEXT);
4488
4489 unsigned NewNumElts = NarrowVecTy.getNumElements();
4490
4491 LLT IdxTy = MRI.getType(Idx);
4492 int64_t PartIdx = IdxVal / NewNumElts;
4493 auto NewIdx =
4494 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4495
4496 if (IsInsert) {
4497 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4498
4499 // Use the adjusted index to insert into one of the subvectors.
4500 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4501 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4502 VecParts[PartIdx] = InsertPart.getReg(0);
4503
4504 // Recombine the inserted subvector with the others to reform the result
4505 // vector.
4506 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4507 } else {
4508 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4509 }
4510
4511 MI.eraseFromParent();
4512 return Legalized;
4513 }
4514
4515 // With a variable index, we can't perform the operation in a smaller type, so
4516 // we're forced to expand this.
4517 //
4518 // TODO: We could emit a chain of compare/select to figure out which piece to
4519 // index.
4521}
4522
4525 LLT NarrowTy) {
4526 // FIXME: Don't know how to handle secondary types yet.
4527 if (TypeIdx != 0)
4528 return UnableToLegalize;
4529
4530 // This implementation doesn't work for atomics. Give up instead of doing
4531 // something invalid.
4532 if (LdStMI.isAtomic())
4533 return UnableToLegalize;
4534
4535 bool IsLoad = isa<GLoad>(LdStMI);
4536 Register ValReg = LdStMI.getReg(0);
4537 Register AddrReg = LdStMI.getPointerReg();
4538 LLT ValTy = MRI.getType(ValReg);
4539
4540 // FIXME: Do we need a distinct NarrowMemory legalize action?
4541 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4542 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4543 return UnableToLegalize;
4544 }
4545
4546 int NumParts = -1;
4547 int NumLeftover = -1;
4548 LLT LeftoverTy;
4549 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4550 if (IsLoad) {
4551 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4552 } else {
4553 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4554 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4555 NumParts = NarrowRegs.size();
4556 NumLeftover = NarrowLeftoverRegs.size();
4557 }
4558 }
4559
4560 if (NumParts == -1)
4561 return UnableToLegalize;
4562
4563 LLT PtrTy = MRI.getType(AddrReg);
4564 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4565
4566 unsigned TotalSize = ValTy.getSizeInBits();
4567
4568 // Split the load/store into PartTy sized pieces starting at Offset. If this
4569 // is a load, return the new registers in ValRegs. For a store, each elements
4570 // of ValRegs should be PartTy. Returns the next offset that needs to be
4571 // handled.
4573 auto MMO = LdStMI.getMMO();
4574 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4575 unsigned NumParts, unsigned Offset) -> unsigned {
4577 unsigned PartSize = PartTy.getSizeInBits();
4578 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4579 ++Idx) {
4580 unsigned ByteOffset = Offset / 8;
4581 Register NewAddrReg;
4582
4583 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4584
4585 MachineMemOperand *NewMMO =
4586 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4587
4588 if (IsLoad) {
4589 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4590 ValRegs.push_back(Dst);
4591 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4592 } else {
4593 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4594 }
4595 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4596 }
4597
4598 return Offset;
4599 };
4600
4601 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4602 unsigned HandledOffset =
4603 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4604
4605 // Handle the rest of the register if this isn't an even type breakdown.
4606 if (LeftoverTy.isValid())
4607 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4608
4609 if (IsLoad) {
4610 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4611 LeftoverTy, NarrowLeftoverRegs);
4612 }
4613
4614 LdStMI.eraseFromParent();
4615 return Legalized;
4616}
4617
4620 LLT NarrowTy) {
4621 using namespace TargetOpcode;
4622 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4623 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4624
4625 switch (MI.getOpcode()) {
4626 case G_IMPLICIT_DEF:
4627 case G_TRUNC:
4628 case G_AND:
4629 case G_OR:
4630 case G_XOR:
4631 case G_ADD:
4632 case G_SUB:
4633 case G_MUL:
4634 case G_PTR_ADD:
4635 case G_SMULH:
4636 case G_UMULH:
4637 case G_FADD:
4638 case G_FMUL:
4639 case G_FSUB:
4640 case G_FNEG:
4641 case G_FABS:
4642 case G_FCANONICALIZE:
4643 case G_FDIV:
4644 case G_FREM:
4645 case G_FMA:
4646 case G_FMAD:
4647 case G_FPOW:
4648 case G_FEXP:
4649 case G_FEXP2:
4650 case G_FEXP10:
4651 case G_FLOG:
4652 case G_FLOG2:
4653 case G_FLOG10:
4654 case G_FLDEXP:
4655 case G_FNEARBYINT:
4656 case G_FCEIL:
4657 case G_FFLOOR:
4658 case G_FRINT:
4659 case G_INTRINSIC_ROUND:
4660 case G_INTRINSIC_ROUNDEVEN:
4661 case G_INTRINSIC_TRUNC:
4662 case G_FCOS:
4663 case G_FSIN:
4664 case G_FTAN:
4665 case G_FSQRT:
4666 case G_BSWAP:
4667 case G_BITREVERSE:
4668 case G_SDIV:
4669 case G_UDIV:
4670 case G_SREM:
4671 case G_UREM:
4672 case G_SDIVREM:
4673 case G_UDIVREM:
4674 case G_SMIN:
4675 case G_SMAX:
4676 case G_UMIN:
4677 case G_UMAX:
4678 case G_ABS:
4679 case G_FMINNUM:
4680 case G_FMAXNUM:
4681 case G_FMINNUM_IEEE:
4682 case G_FMAXNUM_IEEE:
4683 case G_FMINIMUM:
4684 case G_FMAXIMUM:
4685 case G_FSHL:
4686 case G_FSHR:
4687 case G_ROTL:
4688 case G_ROTR:
4689 case G_FREEZE:
4690 case G_SADDSAT:
4691 case G_SSUBSAT:
4692 case G_UADDSAT:
4693 case G_USUBSAT:
4694 case G_UMULO:
4695 case G_SMULO:
4696 case G_SHL:
4697 case G_LSHR:
4698 case G_ASHR:
4699 case G_SSHLSAT:
4700 case G_USHLSAT:
4701 case G_CTLZ:
4702 case G_CTLZ_ZERO_UNDEF:
4703 case G_CTTZ:
4704 case G_CTTZ_ZERO_UNDEF:
4705 case G_CTPOP:
4706 case G_FCOPYSIGN:
4707 case G_ZEXT:
4708 case G_SEXT:
4709 case G_ANYEXT:
4710 case G_FPEXT:
4711 case G_FPTRUNC:
4712 case G_SITOFP:
4713 case G_UITOFP:
4714 case G_FPTOSI:
4715 case G_FPTOUI:
4716 case G_INTTOPTR:
4717 case G_PTRTOINT:
4718 case G_ADDRSPACE_CAST:
4719 case G_UADDO:
4720 case G_USUBO:
4721 case G_UADDE:
4722 case G_USUBE:
4723 case G_SADDO:
4724 case G_SSUBO:
4725 case G_SADDE:
4726 case G_SSUBE:
4727 case G_STRICT_FADD:
4728 case G_STRICT_FSUB:
4729 case G_STRICT_FMUL:
4730 case G_STRICT_FMA:
4731 case G_STRICT_FLDEXP:
4732 case G_FFREXP:
4733 return fewerElementsVectorMultiEltType(GMI, NumElts);
4734 case G_ICMP:
4735 case G_FCMP:
4736 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4737 case G_IS_FPCLASS:
4738 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4739 case G_SELECT:
4740 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4741 return fewerElementsVectorMultiEltType(GMI, NumElts);
4742 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4743 case G_PHI:
4744 return fewerElementsVectorPhi(GMI, NumElts);
4745 case G_UNMERGE_VALUES:
4746 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4747 case G_BUILD_VECTOR:
4748 assert(TypeIdx == 0 && "not a vector type index");
4749 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4750 case G_CONCAT_VECTORS:
4751 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4752 return UnableToLegalize;
4753 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4754 case G_EXTRACT_VECTOR_ELT:
4755 case G_INSERT_VECTOR_ELT:
4756 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4757 case G_LOAD:
4758 case G_STORE:
4759 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4760 case G_SEXT_INREG:
4761 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4763 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4764 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4765 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4766 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4767 case G_SHUFFLE_VECTOR:
4768 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4769 case G_FPOWI:
4770 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4771 case G_BITCAST:
4772 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4773 case G_INTRINSIC_FPTRUNC_ROUND:
4774 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
4775 default:
4776 return UnableToLegalize;
4777 }
4778}
4779
4782 LLT NarrowTy) {
4783 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4784 "Not a bitcast operation");
4785
4786 if (TypeIdx != 0)
4787 return UnableToLegalize;
4788
4789 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4790
4791 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4792 LLT SrcNarrowTy =
4793 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4794
4795 // Split the Src and Dst Reg into smaller registers
4796 SmallVector<Register> SrcVRegs, BitcastVRegs;
4797 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4798 return UnableToLegalize;
4799
4800 // Build new smaller bitcast instructions
4801 // Not supporting Leftover types for now but will have to
4802 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4803 BitcastVRegs.push_back(
4804 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4805
4806 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4807 MI.eraseFromParent();
4808 return Legalized;
4809}
4810
4812 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4813 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4814 if (TypeIdx != 0)
4815 return UnableToLegalize;
4816
4817 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4818 MI.getFirst3RegLLTs();
4819 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4820 // The shuffle should be canonicalized by now.
4821 if (DstTy != Src1Ty)
4822 return UnableToLegalize;
4823 if (DstTy != Src2Ty)
4824 return UnableToLegalize;
4825
4826 if (!isPowerOf2_32(DstTy.getNumElements()))
4827 return UnableToLegalize;
4828
4829 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4830 // Further legalization attempts will be needed to do split further.
4831 NarrowTy =
4832 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4833 unsigned NewElts = NarrowTy.getNumElements();
4834
4835 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4836 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4837 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4838 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4839 SplitSrc2Regs[1]};
4840
4841 Register Hi, Lo;
4842
4843 // If Lo or Hi uses elements from at most two of the four input vectors, then
4844 // express it as a vector shuffle of those two inputs. Otherwise extract the
4845 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4847 for (unsigned High = 0; High < 2; ++High) {
4848 Register &Output = High ? Hi : Lo;
4849
4850 // Build a shuffle mask for the output, discovering on the fly which
4851 // input vectors to use as shuffle operands (recorded in InputUsed).
4852 // If building a suitable shuffle vector proves too hard, then bail
4853 // out with useBuildVector set.
4854 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4855 unsigned FirstMaskIdx = High * NewElts;
4856 bool UseBuildVector = false;
4857 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4858 // The mask element. This indexes into the input.
4859 int Idx = Mask[FirstMaskIdx + MaskOffset];
4860
4861 // The input vector this mask element indexes into.
4862 unsigned Input = (unsigned)Idx / NewElts;
4863
4864 if (Input >= std::size(Inputs)) {
4865 // The mask element does not index into any input vector.
4866 Ops.push_back(-1);
4867 continue;
4868 }
4869
4870 // Turn the index into an offset from the start of the input vector.
4871 Idx -= Input * NewElts;
4872
4873 // Find or create a shuffle vector operand to hold this input.
4874 unsigned OpNo;
4875 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4876 if (InputUsed[OpNo] == Input) {
4877 // This input vector is already an operand.
4878 break;
4879 } else if (InputUsed[OpNo] == -1U) {
4880 // Create a new operand for this input vector.
4881 InputUsed[OpNo] = Input;
4882 break;
4883 }
4884 }
4885
4886 if (OpNo >= std::size(InputUsed)) {
4887 // More than two input vectors used! Give up on trying to create a
4888 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4889 UseBuildVector = true;
4890 break;
4891 }
4892
4893 // Add the mask index for the new shuffle vector.
4894 Ops.push_back(Idx + OpNo * NewElts);
4895 }
4896
4897 if (UseBuildVector) {
4898 LLT EltTy = NarrowTy.getElementType();
4900
4901 // Extract the input elements by hand.
4902 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4903 // The mask element. This indexes into the input.
4904 int Idx = Mask[FirstMaskIdx + MaskOffset];
4905
4906 // The input vector this mask element indexes into.
4907 unsigned Input = (unsigned)Idx / NewElts;
4908
4909 if (Input >= std::size(Inputs)) {
4910 // The mask element is "undef" or indexes off the end of the input.
4911 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4912 continue;
4913 }
4914
4915 // Turn the index into an offset from the start of the input vector.
4916 Idx -= Input * NewElts;
4917
4918 // Extract the vector element by hand.
4919 SVOps.push_back(MIRBuilder
4920 .buildExtractVectorElement(
4921 EltTy, Inputs[Input],
4923 .getReg(0));
4924 }
4925
4926 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4927 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4928 } else if (InputUsed[0] == -1U) {
4929 // No input vectors were used! The result is undefined.
4930 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4931 } else {
4932 Register Op0 = Inputs[InputUsed[0]];
4933 // If only one input was used, use an undefined vector for the other.
4934 Register Op1 = InputUsed[1] == -1U
4935 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4936 : Inputs[InputUsed[1]];
4937 // At least one input vector was used. Create a new shuffle vector.
4938 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4939 }
4940
4941 Ops.clear();
4942 }
4943
4944 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4945 MI.eraseFromParent();
4946 return Legalized;
4947}
4948
4950 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4951 auto &RdxMI = cast<GVecReduce>(MI);
4952
4953 if (TypeIdx != 1)
4954 return UnableToLegalize;
4955
4956 // The semantics of the normal non-sequential reductions allow us to freely
4957 // re-associate the operation.
4958 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4959
4960 if (NarrowTy.isVector() &&
4961 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4962 return UnableToLegalize;
4963
4964 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4965 SmallVector<Register> SplitSrcs;
4966 // If NarrowTy is a scalar then we're being asked to scalarize.
4967 const unsigned NumParts =
4968 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4969 : SrcTy.getNumElements();
4970
4971 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4972 if (NarrowTy.isScalar()) {
4973 if (DstTy != NarrowTy)
4974 return UnableToLegalize; // FIXME: handle implicit extensions.
4975
4976 if (isPowerOf2_32(NumParts)) {
4977 // Generate a tree of scalar operations to reduce the critical path.
4978 SmallVector<Register> PartialResults;
4979 unsigned NumPartsLeft = NumParts;
4980 while (NumPartsLeft > 1) {
4981 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4982 PartialResults.emplace_back(
4984 .buildInstr(ScalarOpc, {NarrowTy},
4985 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4986 .getReg(0));
4987 }
4988 SplitSrcs = PartialResults;
4989 PartialResults.clear();
4990 NumPartsLeft = SplitSrcs.size();
4991 }
4992 assert(SplitSrcs.size() == 1);
4993 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4994 MI.eraseFromParent();
4995 return Legalized;
4996 }
4997 // If we can't generate a tree, then just do sequential operations.
4998 Register Acc = SplitSrcs[0];
4999 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5000 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5001 .getReg(0);
5002 MIRBuilder.buildCopy(DstReg, Acc);
5003 MI.eraseFromParent();
5004 return Legalized;
5005 }
5006 SmallVector<Register> PartialReductions;
5007 for (unsigned Part = 0; Part < NumParts; ++Part) {
5008 PartialReductions.push_back(
5009 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5010 .getReg(0));
5011 }
5012
5013 // If the types involved are powers of 2, we can generate intermediate vector
5014 // ops, before generating a final reduction operation.
5015 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5016 isPowerOf2_32(NarrowTy.getNumElements())) {
5017 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5018 }
5019
5020 Register Acc = PartialReductions[0];
5021 for (unsigned Part = 1; Part < NumParts; ++Part) {
5022 if (Part == NumParts - 1) {
5023 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5024 {Acc, PartialReductions[Part]});
5025 } else {
5026 Acc = MIRBuilder
5027 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5028 .getReg(0);
5029 }
5030 }
5031 MI.eraseFromParent();
5032 return Legalized;
5033}
5034
5037 unsigned int TypeIdx,
5038 LLT NarrowTy) {
5039 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5040 MI.getFirst3RegLLTs();
5041 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5042 DstTy != NarrowTy)
5043 return UnableToLegalize;
5044
5045 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5046 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5047 "Unexpected vecreduce opcode");
5048 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5049 ? TargetOpcode::G_FADD
5050 : TargetOpcode::G_FMUL;
5051
5052 SmallVector<Register> SplitSrcs;
5053 unsigned NumParts = SrcTy.getNumElements();
5054 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5055 Register Acc = ScalarReg;
5056 for (unsigned i = 0; i < NumParts; i++)
5057 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5058 .getReg(0);
5059
5060 MIRBuilder.buildCopy(DstReg, Acc);
5061 MI.eraseFromParent();
5062 return Legalized;
5063}
5064
5066LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5067 LLT SrcTy, LLT NarrowTy,
5068 unsigned ScalarOpc) {
5069 SmallVector<Register> SplitSrcs;
5070 // Split the sources into NarrowTy size pieces.
5071 extractParts(SrcReg, NarrowTy,
5072 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5073 MIRBuilder, MRI);
5074 // We're going to do a tree reduction using vector operations until we have
5075 // one NarrowTy size value left.
5076 while (SplitSrcs.size() > 1) {
5077 SmallVector<Register> PartialRdxs;
5078 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5079 Register LHS = SplitSrcs[Idx];
5080 Register RHS = SplitSrcs[Idx + 1];
5081 // Create the intermediate vector op.
5082 Register Res =
5083 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5084 PartialRdxs.push_back(Res);
5085 }
5086 SplitSrcs = std::move(PartialRdxs);
5087 }
5088 // Finally generate the requested NarrowTy based reduction.
5090 MI.getOperand(1).setReg(SplitSrcs[0]);
5092 return Legalized;
5093}
5094
5097 const LLT HalfTy, const LLT AmtTy) {
5098
5099 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5100 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5101 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5102
5103 if (Amt.isZero()) {
5104 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5105 MI.eraseFromParent();
5106 return Legalized;
5107 }
5108
5109 LLT NVT = HalfTy;
5110 unsigned NVTBits = HalfTy.getSizeInBits();
5111 unsigned VTBits = 2 * NVTBits;
5112
5113 SrcOp Lo(Register(0)), Hi(Register(0));
5114 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5115 if (Amt.ugt(VTBits)) {
5116 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5117 } else if (Amt.ugt(NVTBits)) {
5118 Lo = MIRBuilder.buildConstant(NVT, 0);
5119 Hi = MIRBuilder.buildShl(NVT, InL,
5120 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5121 } else if (Amt == NVTBits) {
5122 Lo = MIRBuilder.buildConstant(NVT, 0);
5123 Hi = InL;
5124 } else {
5125 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5126 auto OrLHS =
5127 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5128 auto OrRHS = MIRBuilder.buildLShr(
5129 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5130 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5131 }
5132 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5133 if (Amt.ugt(VTBits)) {
5134 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5135 } else if (Amt.ugt(NVTBits)) {
5136 Lo = MIRBuilder.buildLShr(NVT, InH,
5137 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5138 Hi = MIRBuilder.buildConstant(NVT, 0);
5139 } else if (Amt == NVTBits) {
5140 Lo = InH;
5141 Hi = MIRBuilder.buildConstant(NVT, 0);
5142 } else {
5143 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5144
5145 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5146 auto OrRHS = MIRBuilder.buildShl(
5147 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5148
5149 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5150 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5151 }
5152 } else {
5153 if (Amt.ugt(VTBits)) {
5155 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5156 } else if (Amt.ugt(NVTBits)) {
5157 Lo = MIRBuilder.buildAShr(NVT, InH,
5158 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5159 Hi = MIRBuilder.buildAShr(NVT, InH,
5160 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5161 } else if (Amt == NVTBits) {
5162 Lo = InH;
5163 Hi = MIRBuilder.buildAShr(NVT, InH,
5164 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5165 } else {
5166 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5167
5168 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5169 auto OrRHS = MIRBuilder.buildShl(
5170 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5171
5172 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5173 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5174 }
5175 }
5176
5177 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5178 MI.eraseFromParent();
5179
5180 return Legalized;
5181}
5182
5183// TODO: Optimize if constant shift amount.
5186 LLT RequestedTy) {
5187 if (TypeIdx == 1) {
5189 narrowScalarSrc(MI, RequestedTy, 2);
5191 return Legalized;
5192 }
5193
5194 Register DstReg = MI.getOperand(0).getReg();
5195 LLT DstTy = MRI.getType(DstReg);
5196 if (DstTy.isVector())
5197 return UnableToLegalize;
5198
5199 Register Amt = MI.getOperand(2).getReg();
5200 LLT ShiftAmtTy = MRI.getType(Amt);
5201 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5202 if (DstEltSize % 2 != 0)
5203 return UnableToLegalize;
5204
5205 // Ignore the input type. We can only go to exactly half the size of the
5206 // input. If that isn't small enough, the resulting pieces will be further
5207 // legalized.
5208 const unsigned NewBitSize = DstEltSize / 2;
5209 const LLT HalfTy = LLT::scalar(NewBitSize);
5210 const LLT CondTy = LLT::scalar(1);
5211
5212 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5213 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5214 ShiftAmtTy);
5215 }
5216
5217 // TODO: Expand with known bits.
5218
5219 // Handle the fully general expansion by an unknown amount.
5220 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5221
5222 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5223 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5224 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5225
5226 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5227 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5228
5229 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5230 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5231 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5232
5233 Register ResultRegs[2];
5234 switch (MI.getOpcode()) {
5235 case TargetOpcode::G_SHL: {
5236 // Short: ShAmt < NewBitSize
5237 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5238
5239 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5240 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5241 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5242
5243 // Long: ShAmt >= NewBitSize
5244 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5245 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5246
5247 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5248 auto Hi = MIRBuilder.buildSelect(
5249 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5250
5251 ResultRegs[0] = Lo.getReg(0);
5252 ResultRegs[1] = Hi.getReg(0);
5253 break;
5254 }
5255 case TargetOpcode::G_LSHR:
5256 case TargetOpcode::G_ASHR: {
5257 // Short: ShAmt < NewBitSize
5258 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5259
5260 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5261 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5262 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5263
5264 // Long: ShAmt >= NewBitSize
5266 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5267 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5268 } else {
5269 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5270 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5271 }
5272 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5273 {InH, AmtExcess}); // Lo from Hi part.
5274
5275 auto Lo = MIRBuilder.buildSelect(
5276 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5277
5278 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5279
5280 ResultRegs[0] = Lo.getReg(0);
5281 ResultRegs[1] = Hi.getReg(0);
5282 break;
5283 }
5284 default:
5285 llvm_unreachable("not a shift");
5286 }
5287
5288 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5289 MI.eraseFromParent();
5290 return Legalized;
5291}
5292
5295 LLT MoreTy) {
5296 assert(TypeIdx == 0 && "Expecting only Idx 0");
5297
5299 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5300 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5302 moreElementsVectorSrc(MI, MoreTy, I);
5303 }
5304
5305 MachineBasicBlock &MBB = *MI.getParent();
5307 moreElementsVectorDst(MI, MoreTy, 0);
5309 return Legalized;
5310}
5311
5312MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5313 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5314 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5315
5316 switch (Opcode) {
5317 default:
5319 "getNeutralElementForVecReduce called with invalid opcode!");
5320 case TargetOpcode::G_VECREDUCE_ADD:
5321 case TargetOpcode::G_VECREDUCE_OR:
5322 case TargetOpcode::G_VECREDUCE_XOR:
5323 case TargetOpcode::G_VECREDUCE_UMAX:
5324 return MIRBuilder.buildConstant(Ty, 0);
5325 case TargetOpcode::G_VECREDUCE_MUL:
5326 return MIRBuilder.buildConstant(Ty, 1);
5327 case TargetOpcode::G_VECREDUCE_AND:
5328 case TargetOpcode::G_VECREDUCE_UMIN:
5331 case TargetOpcode::G_VECREDUCE_SMAX:
5334 case TargetOpcode::G_VECREDUCE_SMIN:
5337 case TargetOpcode::G_VECREDUCE_FADD:
5338 return MIRBuilder.buildFConstant(Ty, -0.0);
5339 case TargetOpcode::G_VECREDUCE_FMUL:
5340 return MIRBuilder.buildFConstant(Ty, 1.0);
5341 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5342 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5343 assert(false && "getNeutralElementForVecReduce unimplemented for "
5344 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5345 }
5346 llvm_unreachable("switch expected to return!");
5347}
5348
5351 LLT MoreTy) {
5352 unsigned Opc = MI.getOpcode();
5353 switch (Opc) {
5354 case TargetOpcode::G_IMPLICIT_DEF:
5355 case TargetOpcode::G_LOAD: {
5356 if (TypeIdx != 0)
5357 return UnableToLegalize;
5359 moreElementsVectorDst(MI, MoreTy, 0);
5361 return Legalized;
5362 }
5363 case TargetOpcode::G_STORE:
5364 if (TypeIdx != 0)
5365 return UnableToLegalize;
5367 moreElementsVectorSrc(MI, MoreTy, 0);
5369 return Legalized;
5370 case TargetOpcode::G_AND:
5371 case TargetOpcode::G_OR:
5372 case TargetOpcode::G_XOR:
5373 case TargetOpcode::G_ADD:
5374 case TargetOpcode::G_SUB:
5375 case TargetOpcode::G_MUL:
5376 case TargetOpcode::G_FADD:
5377 case TargetOpcode::G_FSUB:
5378 case TargetOpcode::G_FMUL:
5379 case TargetOpcode::G_FDIV:
5380 case TargetOpcode::G_FCOPYSIGN:
5381 case TargetOpcode::G_UADDSAT:
5382 case TargetOpcode::G_USUBSAT:
5383 case TargetOpcode::G_SADDSAT:
5384 case TargetOpcode::G_SSUBSAT:
5385 case TargetOpcode::G_SMIN:
5386 case TargetOpcode::G_SMAX:
5387 case TargetOpcode::G_UMIN:
5388 case TargetOpcode::G_UMAX:
5389 case TargetOpcode::G_FMINNUM:
5390 case TargetOpcode::G_FMAXNUM:
5391 case TargetOpcode::G_FMINNUM_IEEE:
5392 case TargetOpcode::G_FMAXNUM_IEEE:
5393 case TargetOpcode::G_FMINIMUM:
5394 case TargetOpcode::G_FMAXIMUM:
5395 case TargetOpcode::G_STRICT_FADD:
5396 case TargetOpcode::G_STRICT_FSUB:
5397 case TargetOpcode::G_STRICT_FMUL:
5398 case TargetOpcode::G_SHL:
5399 case TargetOpcode::G_ASHR:
5400 case TargetOpcode::G_LSHR: {
5402 moreElementsVectorSrc(MI, MoreTy, 1);
5403 moreElementsVectorSrc(MI, MoreTy, 2);
5404 moreElementsVectorDst(MI, MoreTy, 0);
5406 return Legalized;
5407 }
5408 case TargetOpcode::G_FMA:
5409 case TargetOpcode::G_STRICT_FMA:
5410 case TargetOpcode::G_FSHR:
5411 case TargetOpcode::G_FSHL: {
5413 moreElementsVectorSrc(MI, MoreTy, 1);
5414 moreElementsVectorSrc(MI, MoreTy, 2);
5415 moreElementsVectorSrc(MI, MoreTy, 3);
5416 moreElementsVectorDst(MI, MoreTy, 0);
5418 return Legalized;
5419 }
5420 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5421 case TargetOpcode::G_EXTRACT:
5422 if (TypeIdx != 1)
5423 return UnableToLegalize;
5425 moreElementsVectorSrc(MI, MoreTy, 1);
5427 return Legalized;
5428 case TargetOpcode::G_INSERT:
5429 case TargetOpcode::G_INSERT_VECTOR_ELT:
5430 case TargetOpcode::G_FREEZE:
5431 case TargetOpcode::G_FNEG:
5432 case TargetOpcode::G_FABS:
5433 case TargetOpcode::G_FSQRT:
5434 case TargetOpcode::G_FCEIL:
5435 case TargetOpcode::G_FFLOOR:
5436 case TargetOpcode::G_FNEARBYINT:
5437 case TargetOpcode::G_FRINT:
5438 case TargetOpcode::G_INTRINSIC_ROUND:
5439 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5440 case TargetOpcode::G_INTRINSIC_TRUNC:
5441 case TargetOpcode::G_BSWAP:
5442 case TargetOpcode::G_FCANONICALIZE:
5443 case TargetOpcode::G_SEXT_INREG:
5444 case TargetOpcode::G_ABS:
5445 if (TypeIdx != 0)
5446 return UnableToLegalize;
5448 moreElementsVectorSrc(MI, MoreTy, 1);
5449 moreElementsVectorDst(MI, MoreTy, 0);
5451 return Legalized;
5452 case TargetOpcode::G_SELECT: {
5453 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5454 if (TypeIdx == 1) {
5455 if (!CondTy.isScalar() ||
5456 DstTy.getElementCount() != MoreTy.getElementCount())
5457 return UnableToLegalize;
5458
5459 // This is turning a scalar select of vectors into a vector
5460 // select. Broadcast the select condition.
5461 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5463 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5465 return Legalized;
5466 }
5467
5468 if (CondTy.isVector())
5469 return UnableToLegalize;
5470
5472 moreElementsVectorSrc(MI, MoreTy, 2);
5473 moreElementsVectorSrc(MI, MoreTy, 3);
5474 moreElementsVectorDst(MI, MoreTy, 0);
5476 return Legalized;
5477 }
5478 case TargetOpcode::G_UNMERGE_VALUES:
5479 return UnableToLegalize;
5480 case TargetOpcode::G_PHI:
5481 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5482 case TargetOpcode::G_SHUFFLE_VECTOR:
5483 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5484 case TargetOpcode::G_BUILD_VECTOR: {
5486 for (auto Op : MI.uses()) {
5487 Elts.push_back(Op.getReg());
5488 }
5489
5490 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5492 }
5493
5495 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5496 MI.eraseFromParent();
5497 return Legalized;
5498 }
5499 case TargetOpcode::G_SEXT:
5500 case TargetOpcode::G_ZEXT:
5501 case TargetOpcode::G_ANYEXT:
5502 case TargetOpcode::G_TRUNC:
5503 case TargetOpcode::G_FPTRUNC:
5504 case TargetOpcode::G_FPEXT:
5505 case TargetOpcode::G_FPTOSI:
5506 case TargetOpcode::G_FPTOUI:
5507 case TargetOpcode::G_SITOFP:
5508 case TargetOpcode::G_UITOFP: {
5510 LLT SrcExtTy;
5511 LLT DstExtTy;
5512 if (TypeIdx == 0) {
5513 DstExtTy = MoreTy;
5514 SrcExtTy = LLT::fixed_vector(
5515 MoreTy.getNumElements(),
5516 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5517 } else {
5518 DstExtTy = LLT::fixed_vector(
5519 MoreTy.getNumElements(),
5520 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5521 SrcExtTy = MoreTy;
5522 }
5523 moreElementsVectorSrc(MI, SrcExtTy, 1);
5524 moreElementsVectorDst(MI, DstExtTy, 0);
5526 return Legalized;
5527 }
5528 case TargetOpcode::G_ICMP:
5529 case TargetOpcode::G_FCMP: {
5530 if (TypeIdx != 1)
5531 return UnableToLegalize;
5532
5534 moreElementsVectorSrc(MI, MoreTy, 2);
5535 moreElementsVectorSrc(MI, MoreTy, 3);
5536 LLT CondTy = LLT::fixed_vector(
5537 MoreTy.getNumElements(),
5538 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5539 moreElementsVectorDst(MI, CondTy, 0);
5541 return Legalized;
5542 }
5543 case TargetOpcode::G_BITCAST: {
5544 if (TypeIdx != 0)
5545 return UnableToLegalize;
5546
5547 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5548 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5549
5550 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5551 if (coefficient % DstTy.getNumElements() != 0)
5552 return UnableToLegalize;
5553
5554 coefficient = coefficient / DstTy.getNumElements();
5555
5556 LLT NewTy = SrcTy.changeElementCount(
5557 ElementCount::get(coefficient, MoreTy.isScalable()));
5559 moreElementsVectorSrc(MI, NewTy, 1);
5560 moreElementsVectorDst(MI, MoreTy, 0);
5562 return Legalized;
5563 }
5564 case TargetOpcode::G_VECREDUCE_FADD:
5565 case TargetOpcode::G_VECREDUCE_FMUL:
5566 case TargetOpcode::G_VECREDUCE_ADD:
5567 case TargetOpcode::G_VECREDUCE_MUL:
5568 case TargetOpcode::G_VECREDUCE_AND:
5569 case TargetOpcode::G_VECREDUCE_OR:
5570 case TargetOpcode::G_VECREDUCE_XOR:
5571 case TargetOpcode::G_VECREDUCE_SMAX:
5572 case TargetOpcode::G_VECREDUCE_SMIN:
5573 case TargetOpcode::G_VECREDUCE_UMAX:
5574 case TargetOpcode::G_VECREDUCE_UMIN: {
5575 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5576 MachineOperand &MO = MI.getOperand(1);
5577 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5578 auto NeutralElement = getNeutralElementForVecReduce(
5579 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5580
5582 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5583 i != e; i++) {
5584 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5585 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5586 NeutralElement, Idx);
5587 }
5588
5590 MO.setReg(NewVec.getReg(0));
5592 return Legalized;
5593 }
5594
5595 default:
5596 return UnableToLegalize;
5597 }
5598}
5599
5602 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5603 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5604 unsigned MaskNumElts = Mask.size();
5605 unsigned SrcNumElts = SrcTy.getNumElements();
5606 LLT DestEltTy = DstTy.getElementType();
5607
5608 if (MaskNumElts == SrcNumElts)
5609 return Legalized;
5610
5611 if (MaskNumElts < SrcNumElts) {
5612 // Extend mask to match new destination vector size with
5613 // undef values.
5614 SmallVector<int, 16> NewMask(Mask);
5615 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5616 NewMask.push_back(-1);
5617
5618 moreElementsVectorDst(MI, SrcTy, 0);
5620 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5621 MI.getOperand(1).getReg(),
5622 MI.getOperand(2).getReg(), NewMask);
5623 MI.eraseFromParent();
5624
5625 return Legalized;
5626 }
5627
5628 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5629 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5630 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5631
5632 // Create new source vectors by concatenating the initial
5633 // source vectors with undefined vectors of the same size.
5634 auto Undef = MIRBuilder.buildUndef(SrcTy);
5635 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5636 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5637 MOps1[0] = MI.getOperand(1).getReg();
5638 MOps2[0] = MI.getOperand(2).getReg();
5639
5640 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5641 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5642
5643 // Readjust mask for new input vector length.
5644 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5645 for (unsigned I = 0; I != MaskNumElts; ++I) {
5646 int Idx = Mask[I];
5647 if (Idx >= static_cast<int>(SrcNumElts))
5648 Idx += PaddedMaskNumElts - SrcNumElts;
5649 MappedOps[I] = Idx;
5650 }
5651
5652 // If we got more elements than required, extract subvector.
5653 if (MaskNumElts != PaddedMaskNumElts) {
5654 auto Shuffle =
5655 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5656
5657 SmallVector<Register, 16> Elts(MaskNumElts);
5658 for (unsigned I = 0; I < MaskNumElts; ++I) {
5659 Elts[I] =
5661 .getReg(0);
5662 }
5663 MIRBuilder.buildBuildVector(DstReg, Elts);
5664 } else {
5665 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5666 }
5667
5668 MI.eraseFromParent();
5670}
5671
5674 unsigned int TypeIdx, LLT MoreTy) {
5675 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5676 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5677 unsigned NumElts = DstTy.getNumElements();
5678 unsigned WidenNumElts = MoreTy.getNumElements();
5679
5680 if (DstTy.isVector() && Src1Ty.isVector() &&
5681 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5683 }
5684
5685 if (TypeIdx != 0)
5686 return UnableToLegalize;
5687
5688 // Expect a canonicalized shuffle.
5689 if (DstTy != Src1Ty || DstTy != Src2Ty)
5690 return UnableToLegalize;
5691
5692 moreElementsVectorSrc(MI, MoreTy, 1);
5693 moreElementsVectorSrc(MI, MoreTy, 2);
5694
5695 // Adjust mask based on new input vector length.
5696 SmallVector<int, 16> NewMask;
5697 for (unsigned I = 0; I != NumElts; ++I) {
5698 int Idx = Mask[I];
5699 if (Idx < static_cast<int>(NumElts))
5700 NewMask.push_back(Idx);
5701 else
5702 NewMask.push_back(Idx - NumElts + WidenNumElts);
5703 }
5704 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5705 NewMask.push_back(-1);
5706 moreElementsVectorDst(MI, MoreTy, 0);
5708 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5709 MI.getOperand(1).getReg(),
5710 MI.getOperand(2).getReg(), NewMask);
5711 MI.eraseFromParent();
5712 return Legalized;
5713}
5714
5715void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5716 ArrayRef<Register> Src1Regs,
5717 ArrayRef<Register> Src2Regs,
5718 LLT NarrowTy) {
5720 unsigned SrcParts = Src1Regs.size();
5721 unsigned DstParts = DstRegs.size();
5722
5723 unsigned DstIdx = 0; // Low bits of the result.
5724 Register FactorSum =
5725 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5726 DstRegs[DstIdx] = FactorSum;
5727
5728 unsigned CarrySumPrevDstIdx;
5730
5731 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5732 // Collect low parts of muls for DstIdx.
5733 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5734 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5736 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5737 Factors.push_back(Mul.getReg(0));
5738 }
5739 // Collect high parts of muls from previous DstIdx.
5740 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5741 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5742 MachineInstrBuilder Umulh =
5743 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5744 Factors.push_back(Umulh.getReg(0));
5745 }
5746 // Add CarrySum from additions calculated for previous DstIdx.
5747 if (DstIdx != 1) {
5748 Factors.push_back(CarrySumPrevDstIdx);
5749 }
5750
5751 Register CarrySum;
5752 // Add all factors and accumulate all carries into CarrySum.
5753 if (DstIdx != DstParts - 1) {
5754 MachineInstrBuilder Uaddo =
5755 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5756 FactorSum = Uaddo.getReg(0);
5757 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5758 for (unsigned i = 2; i < Factors.size(); ++i) {
5759 MachineInstrBuilder Uaddo =
5760 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5761 FactorSum = Uaddo.getReg(0);
5762 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5763 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5764 }
5765 } else {
5766 // Since value for the next index is not calculated, neither is CarrySum.
5767 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5768 for (unsigned i = 2; i < Factors.size(); ++i)
5769 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5770 }
5771
5772 CarrySumPrevDstIdx = CarrySum;
5773 DstRegs[DstIdx] = FactorSum;
5774 Factors.clear();
5775 }
5776}
5777
5780 LLT NarrowTy) {
5781 if (TypeIdx != 0)
5782 return UnableToLegalize;
5783
5784 Register DstReg = MI.getOperand(0).getReg();
5785 LLT DstType = MRI.getType(DstReg);
5786 // FIXME: add support for vector types
5787 if (DstType.isVector())
5788 return UnableToLegalize;
5789
5790 unsigned Opcode = MI.getOpcode();
5791 unsigned OpO, OpE, OpF;
5792 switch (Opcode) {
5793 case TargetOpcode::G_SADDO:
5794 case TargetOpcode::G_SADDE:
5795 case TargetOpcode::G_UADDO:
5796 case TargetOpcode::G_UADDE:
5797 case TargetOpcode::G_ADD:
5798 OpO = TargetOpcode::G_UADDO;
5799 OpE = TargetOpcode::G_UADDE;
5800 OpF = TargetOpcode::G_UADDE;
5801 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5802 OpF = TargetOpcode::G_SADDE;
5803 break;
5804 case TargetOpcode::G_SSUBO:
5805 case TargetOpcode::G_SSUBE:
5806 case TargetOpcode::G_USUBO:
5807 case TargetOpcode::G_USUBE:
5808 case TargetOpcode::G_SUB:
5809 OpO = TargetOpcode::G_USUBO;
5810 OpE = TargetOpcode::G_USUBE;
5811 OpF = TargetOpcode::G_USUBE;
5812 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5813 OpF = TargetOpcode::G_SSUBE;
5814 break;
5815 default:
5816 llvm_unreachable("Unexpected add/sub opcode!");
5817 }
5818
5819 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5820 unsigned NumDefs = MI.getNumExplicitDefs();
5821 Register Src1 = MI.getOperand(NumDefs).getReg();
5822 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5823 Register CarryDst, CarryIn;
5824 if (NumDefs == 2)
5825 CarryDst = MI.getOperand(1).getReg();
5826 if (MI.getNumOperands() == NumDefs + 3)
5827 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5828
5829 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5830 LLT LeftoverTy, DummyTy;
5831 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5832 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5833 MIRBuilder, MRI);
5834 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5835 MRI);
5836
5837 int NarrowParts = Src1Regs.size();
5838 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5839 Src1Regs.push_back(Src1Left[I]);
5840 Src2Regs.push_back(Src2Left[I]);
5841 }
5842 DstRegs.reserve(Src1Regs.size());
5843
5844 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5845 Register DstReg =
5846 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5848 // Forward the final carry-out to the destination register
5849 if (i == e - 1 && CarryDst)
5850 CarryOut = CarryDst;
5851
5852 if (!CarryIn) {
5853 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5854 {Src1Regs[i], Src2Regs[i]});
5855 } else if (i == e - 1) {
5856 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5857 {Src1Regs[i], Src2Regs[i], CarryIn});
5858 } else {
5859 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5860 {Src1Regs[i], Src2Regs[i], CarryIn});
5861 }
5862
5863 DstRegs.push_back(DstReg);
5864 CarryIn = CarryOut;
5865 }
5866 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5867 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5868 ArrayRef(DstRegs).drop_front(NarrowParts));
5869
5870 MI.eraseFromParent();
5871 return Legalized;
5872}
5873
5876 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5877
5878 LLT Ty = MRI.getType(DstReg);
5879 if (Ty.isVector())
5880 return UnableToLegalize;
5881
5882 unsigned Size = Ty.getSizeInBits();
5883 unsigned NarrowSize = NarrowTy.getSizeInBits();
5884 if (Size % NarrowSize != 0)
5885 return UnableToLegalize;
5886
5887 unsigned NumParts = Size / NarrowSize;
5888 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5889 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5890
5891 SmallVector<Register, 2> Src1Parts, Src2Parts;
5892 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5893 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5894 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5895 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5896
5897 // Take only high half of registers if this is high mul.
5898 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5899 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5900 MI.eraseFromParent();
5901 return Legalized;
5902}
5903
5906 LLT NarrowTy) {
5907 if (TypeIdx != 0)
5908 return UnableToLegalize;
5909
5910 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5911
5912 Register Src = MI.getOperand(1).getReg();
5913 LLT SrcTy = MRI.getType(Src);
5914
5915 // If all finite floats fit into the narrowed integer type, we can just swap
5916 // out the result type. This is practically only useful for conversions from
5917 // half to at least 16-bits, so just handle the one case.
5918 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5919 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5920 return UnableToLegalize;
5921
5923 narrowScalarDst(MI, NarrowTy, 0,
5924 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5926 return Legalized;
5927}
5928
5931 LLT NarrowTy) {
5932 if (TypeIdx != 1)
5933 return UnableToLegalize;
5934
5935 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5936
5937 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5938 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5939 // NarrowSize.
5940 if (SizeOp1 % NarrowSize != 0)
5941 return UnableToLegalize;
5942 int NumParts = SizeOp1 / NarrowSize;
5943
5944 SmallVector<Register, 2> SrcRegs, DstRegs;
5946 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
5947 MIRBuilder, MRI);
5948
5949 Register OpReg = MI.getOperand(0).getReg();
5950 uint64_t OpStart = MI.getOperand(2).getImm();
5951 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5952 for (int i = 0; i < NumParts; ++i) {
5953 unsigned SrcStart = i * NarrowSize;
5954
5955 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5956 // No part of the extract uses this subregister, ignore it.
5957 continue;
5958 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5959 // The entire subregister is extracted, forward the value.
5960 DstRegs.push_back(SrcRegs[i]);
5961 continue;
5962 }
5963
5964 // OpSegStart is where this destination segment would start in OpReg if it
5965 // extended infinitely in both directions.
5966 int64_t ExtractOffset;
5967 uint64_t SegSize;
5968 if (OpStart < SrcStart) {
5969 ExtractOffset = 0;
5970 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5971 } else {
5972 ExtractOffset = OpStart - SrcStart;
5973 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5974 }
5975
5976 Register SegReg = SrcRegs[i];
5977 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5978 // A genuine extract is needed.
5979 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5980 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5981 }
5982
5983 DstRegs.push_back(SegReg);
5984 }
5985
5986 Register DstReg = MI.getOperand(0).getReg();
5987 if (MRI.getType(DstReg).isVector())
5988 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5989 else if (DstRegs.size() > 1)
5990 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5991 else
5992 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5993 MI.eraseFromParent();
5994 return Legalized;
5995}
5996
5999 LLT NarrowTy) {
6000 // FIXME: Don't know how to handle secondary types yet.
6001 if (TypeIdx != 0)
6002 return UnableToLegalize;
6003
6004 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
6006 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
6007 LLT LeftoverTy;
6008 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6009 LeftoverRegs, MIRBuilder, MRI);
6010
6011 for (Register Reg : LeftoverRegs)
6012 SrcRegs.push_back(Reg);
6013
6014 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6015 Register OpReg = MI.getOperand(2).getReg();
6016 uint64_t OpStart = MI.getOperand(3).getImm();
6017 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
6018 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6019 unsigned DstStart = I * NarrowSize;
6020
6021 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6022 // The entire subregister is defined by this insert, forward the new
6023 // value.
6024 DstRegs.push_back(OpReg);
6025 continue;
6026 }
6027
6028 Register SrcReg = SrcRegs[I];
6029 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
6030 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6031 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6032 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
6033 }
6034
6035 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6036 // No part of the insert affects this subregister, forward the original.
6037 DstRegs.push_back(SrcReg);
6038 continue;
6039 }
6040
6041 // OpSegStart is where this destination segment would start in OpReg if it
6042 // extended infinitely in both directions.
6043 int64_t ExtractOffset, InsertOffset;
6044 uint64_t SegSize;
6045 if (OpStart < DstStart) {
6046 InsertOffset = 0;
6047 ExtractOffset = DstStart - OpStart;
6048 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6049 } else {
6050 InsertOffset = OpStart - DstStart;
6051 ExtractOffset = 0;
6052 SegSize =
6053 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6054 }
6055
6056 Register SegReg = OpReg;
6057 if (ExtractOffset != 0 || SegSize != OpSize) {
6058 // A genuine extract is needed.
6059 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6060 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6061 }
6062
6063 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6064 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6065 DstRegs.push_back(DstReg);
6066 }
6067
6068 uint64_t WideSize = DstRegs.size() * NarrowSize;
6069 Register DstReg = MI.getOperand(0).getReg();
6070 if (WideSize > RegTy.getSizeInBits()) {
6071 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6072 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6073 MIRBuilder.buildTrunc(DstReg, MergeReg);
6074 } else
6075 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6076
6077 MI.eraseFromParent();
6078 return Legalized;
6079}
6080
6083 LLT NarrowTy) {
6084 Register DstReg = MI.getOperand(0).getReg();
6085 LLT DstTy = MRI.getType(DstReg);
6086
6087 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6088
6089 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6090 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6091 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6092 LLT LeftoverTy;
6093 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6094 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6095 return UnableToLegalize;
6096
6097 LLT Unused;
6098 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6099 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6100 llvm_unreachable("inconsistent extractParts result");
6101
6102 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6103 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6104 {Src0Regs[I], Src1Regs[I]});
6105 DstRegs.push_back(Inst.getReg(0));
6106 }
6107
6108 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6109 auto Inst = MIRBuilder.buildInstr(
6110 MI.getOpcode(),
6111 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6112 DstLeftoverRegs.push_back(Inst.getReg(0));
6113 }
6114
6115 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6116 LeftoverTy, DstLeftoverRegs);
6117
6118 MI.eraseFromParent();
6119 return Legalized;
6120}
6121
6124 LLT NarrowTy) {
6125 if (TypeIdx != 0)
6126 return UnableToLegalize;
6127
6128 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6129
6130 LLT DstTy = MRI.getType(DstReg);
6131 if (DstTy.isVector())
6132 return UnableToLegalize;
6133
6135 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6136 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6137 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6138
6139 MI.eraseFromParent();
6140 return Legalized;
6141}
6142
6145 LLT NarrowTy) {
6146 if (TypeIdx != 0)
6147 return UnableToLegalize;
6148
6149 Register CondReg = MI.getOperand(1).getReg();
6150 LLT CondTy = MRI.getType(CondReg);
6151 if (CondTy.isVector()) // TODO: Handle vselect
6152 return UnableToLegalize;
6153
6154 Register DstReg = MI.getOperand(0).getReg();
6155 LLT DstTy = MRI.getType(DstReg);
6156
6157 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6158 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6159 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6160 LLT LeftoverTy;
6161 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6162 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6163 return UnableToLegalize;
6164
6165 LLT Unused;
6166 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6167 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6168 llvm_unreachable("inconsistent extractParts result");
6169
6170 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6171 auto Select = MIRBuilder.buildSelect(NarrowTy,
6172 CondReg, Src1Regs[I], Src2Regs[I]);
6173 DstRegs.push_back(Select.getReg(0));
6174 }
6175
6176 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6178 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6179 DstLeftoverRegs.push_back(Select.getReg(0));
6180 }
6181
6182 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6183 LeftoverTy, DstLeftoverRegs);
6184
6185 MI.eraseFromParent();
6186 return Legalized;
6187}
6188
6191 LLT NarrowTy) {
6192 if (TypeIdx != 1)
6193 return UnableToLegalize;
6194
6195 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6196 unsigned NarrowSize = NarrowTy.getSizeInBits();
6197
6198 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6199 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6200
6202 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6203 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6204 auto C_0 = B.buildConstant(NarrowTy, 0);
6205 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6206 UnmergeSrc.getReg(1), C_0);
6207 auto LoCTLZ = IsUndef ?
6208 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6209 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6210 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6211 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6212 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6213 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6214
6215 MI.eraseFromParent();
6216 return Legalized;
6217 }
6218
6219 return UnableToLegalize;
6220}
6221
6224 LLT NarrowTy) {
6225 if (TypeIdx != 1)
6226 return UnableToLegalize;
6227
6228 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6229 unsigned NarrowSize = NarrowTy.getSizeInBits();
6230
6231 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6232 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6233
6235 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6236 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6237 auto C_0 = B.buildConstant(NarrowTy, 0);
6238 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6239 UnmergeSrc.getReg(0), C_0);
6240 auto HiCTTZ = IsUndef ?
6241 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6242 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6243 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6244 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6245 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6246 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6247
6248 MI.eraseFromParent();
6249 return Legalized;
6250 }
6251
6252 return UnableToLegalize;
6253}
6254
6257 LLT NarrowTy) {
6258 if (TypeIdx != 1)
6259 return UnableToLegalize;
6260
6261 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6262 unsigned NarrowSize = NarrowTy.getSizeInBits();
6263
6264 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6265 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6266
6267 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6268 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6269 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6270
6271 MI.eraseFromParent();
6272 return Legalized;
6273 }
6274
6275 return UnableToLegalize;
6276}
6277
6280 LLT NarrowTy) {
6281 if (TypeIdx != 1)
6282 return UnableToLegalize;
6283
6285 Register ExpReg = MI.getOperand(2).getReg();
6286 LLT ExpTy = MRI.getType(ExpReg);
6287
6288 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6289
6290 // Clamp the exponent to the range of the target type.
6291 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6292 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6293 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6294 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6295
6296 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6298 MI.getOperand(2).setReg(Trunc.getReg(0));
6300 return Legalized;
6301}
6302
6305 unsigned Opc = MI.getOpcode();
6306 const auto &TII = MIRBuilder.getTII();
6307 auto isSupported = [this](const LegalityQuery &Q) {
6308 auto QAction = LI.getAction(Q).Action;
6309 return QAction == Legal || QAction == Libcall || QAction == Custom;
6310 };
6311 switch (Opc) {
6312 default:
6313 return UnableToLegalize;
6314 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6315 // This trivially expands to CTLZ.
6317 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6319 return Legalized;
6320 }
6321 case TargetOpcode::G_CTLZ: {
6322 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6323 unsigned Len = SrcTy.getSizeInBits();
6324
6325 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6326 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6327 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6328 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6329 auto ICmp = MIRBuilder.buildICmp(
6330 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6331 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6332 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6333 MI.eraseFromParent();
6334 return Legalized;
6335 }
6336 // for now, we do this:
6337 // NewLen = NextPowerOf2(Len);
6338 // x = x | (x >> 1);
6339 // x = x | (x >> 2);
6340 // ...
6341 // x = x | (x >>16);
6342 // x = x | (x >>32); // for 64-bit input
6343 // Upto NewLen/2
6344 // return Len - popcount(x);
6345 //
6346 // Ref: "Hacker's Delight" by Henry Warren
6347 Register Op = SrcReg;
6348 unsigned NewLen = PowerOf2Ceil(Len);
6349 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6350 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6351 auto MIBOp = MIRBuilder.buildOr(
6352 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6353 Op = MIBOp.getReg(0);
6354 }
6355 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6356 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6357 MIBPop);
6358 MI.eraseFromParent();
6359 return Legalized;
6360 }
6361 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6362 // This trivially expands to CTTZ.
6364 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6366 return Legalized;
6367 }
6368 case TargetOpcode::G_CTTZ: {
6369 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6370
6371 unsigned Len = SrcTy.getSizeInBits();
6372 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6373 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6374 // zero.
6375 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6376 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6377 auto ICmp = MIRBuilder.buildICmp(
6378 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6379 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6380 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6381 MI.eraseFromParent();
6382 return Legalized;
6383 }
6384 // for now, we use: { return popcount(~x & (x - 1)); }
6385 // unless the target has ctlz but not ctpop, in which case we use:
6386 // { return 32 - nlz(~x & (x-1)); }
6387 // Ref: "Hacker's Delight" by Henry Warren
6388 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6389 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6390 auto MIBTmp = MIRBuilder.buildAnd(
6391 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6392 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6393 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6394 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6395 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6396 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6397 MI.eraseFromParent();
6398 return Legalized;
6399 }
6401 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6402 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6404 return Legalized;
6405 }
6406 case TargetOpcode::G_CTPOP: {
6407 Register SrcReg = MI.getOperand(1).getReg();
6408 LLT Ty = MRI.getType(SrcReg);
6409 unsigned Size = Ty.getSizeInBits();
6411
6412 // Count set bits in blocks of 2 bits. Default approach would be
6413 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6414 // We use following formula instead:
6415 // B2Count = val - { (val >> 1) & 0x55555555 }
6416 // since it gives same result in blocks of 2 with one instruction less.
6417 auto C_1 = B.buildConstant(Ty, 1);
6418 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6419 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6420 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6421 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6422 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6423
6424 // In order to get count in blocks of 4 add values from adjacent block of 2.
6425 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6426 auto C_2 = B.buildConstant(Ty, 2);
6427 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6428 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6429 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6430 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6431 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6432 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6433
6434 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6435 // addition since count value sits in range {0,...,8} and 4 bits are enough
6436 // to hold such binary values. After addition high 4 bits still hold count
6437 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6438 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6439 auto C_4 = B.buildConstant(Ty, 4);
6440 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6441 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6442 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6443 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6444 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6445
6446 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6447 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6448 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6449 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6450
6451 // Shift count result from 8 high bits to low bits.
6452 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6453
6454 auto IsMulSupported = [this](const LLT Ty) {
6455 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
6456 return Action == Legal || Action == WidenScalar || Action == Custom;
6457 };
6458 if (IsMulSupported(Ty)) {
6459 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6460 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6461 } else {
6462 auto ResTmp = B8Count;
6463 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6464 auto ShiftC = B.buildConstant(Ty, Shift);
6465 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
6466 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
6467 }
6468 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6469 }
6470 MI.eraseFromParent();
6471 return Legalized;
6472 }
6473 }
6474}
6475
6476// Check that (every element of) Reg is undef or not an exact multiple of BW.
6478 Register Reg, unsigned BW) {
6479 return matchUnaryPredicate(
6480 MRI, Reg,
6481 [=](const Constant *C) {
6482 // Null constant here means an undef.
6483 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6484 return !CI || CI->getValue().urem(BW) != 0;
6485 },
6486 /*AllowUndefs*/ true);
6487}
6488
6491 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6492 LLT Ty = MRI.getType(Dst);
6493 LLT ShTy = MRI.getType(Z);
6494
6495 unsigned BW = Ty.getScalarSizeInBits();
6496
6497 if (!isPowerOf2_32(BW))
6498 return UnableToLegalize;
6499
6500 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6501 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6502
6503 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6504 // fshl X, Y, Z -> fshr X, Y, -Z
6505 // fshr X, Y, Z -> fshl X, Y, -Z
6506 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6507 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6508 } else {
6509 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6510 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6511 auto One = MIRBuilder.buildConstant(ShTy, 1);
6512 if (IsFSHL) {
6513 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6514 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6515 } else {
6516 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6517 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6518 }
6519
6520 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6521 }
6522
6523 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6524 MI.eraseFromParent();
6525 return Legalized;
6526}
6527
6530 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6531 LLT Ty = MRI.getType(Dst);
6532 LLT ShTy = MRI.getType(Z);
6533
6534 const unsigned BW = Ty.getScalarSizeInBits();
6535 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6536
6537 Register ShX, ShY;
6538 Register ShAmt, InvShAmt;
6539
6540 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6541 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6542 // fshl: X << C | Y >> (BW - C)
6543 // fshr: X << (BW - C) | Y >> C
6544 // where C = Z % BW is not zero
6545 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6546 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6547 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6548 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6549 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6550 } else {
6551 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6552 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6553 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6554 if (isPowerOf2_32(BW)) {
6555 // Z % BW -> Z & (BW - 1)
6556 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6557 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6558 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6559 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6560 } else {
6561 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6562 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6563 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6564 }
6565
6566 auto One = MIRBuilder.buildConstant(ShTy, 1);
6567 if (IsFSHL) {
6568 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6569 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6570 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6571 } else {
6572 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6573 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6574 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6575 }
6576 }
6577
6578 MIRBuilder.buildOr(Dst, ShX, ShY);
6579 MI.eraseFromParent();
6580 return Legalized;
6581}
6582
6585 // These operations approximately do the following (while avoiding undefined
6586 // shifts by BW):
6587 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6588 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6589 Register Dst = MI.getOperand(0).getReg();
6590 LLT Ty = MRI.getType(Dst);
6591 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6592
6593 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6594 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6595
6596 // TODO: Use smarter heuristic that accounts for vector legalization.
6597 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6598 return lowerFunnelShiftAsShifts(MI);
6599
6600 // This only works for powers of 2, fallback to shifts if it fails.
6601 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6602 if (Result == UnableToLegalize)
6603 return lowerFunnelShiftAsShifts(MI);
6604 return Result;
6605}
6606
6608 auto [Dst, Src] = MI.getFirst2Regs();
6609 LLT DstTy = MRI.getType(Dst);
6610 LLT SrcTy = MRI.getType(Src);
6611
6612 uint32_t DstTySize = DstTy.getSizeInBits();
6613 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6614 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6615
6616 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6617 !isPowerOf2_32(SrcTyScalarSize))
6618 return UnableToLegalize;
6619
6620 // The step between extend is too large, split it by creating an intermediate
6621 // extend instruction
6622 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6623 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6624 // If the destination type is illegal, split it into multiple statements
6625 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6626 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6627 // Unmerge the vector
6628 LLT EltTy = MidTy.changeElementCount(
6630 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6631
6632 // ZExt the vectors
6633 LLT ZExtResTy = DstTy.changeElementCount(
6635 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6636 {UnmergeSrc.getReg(0)});
6637 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6638 {UnmergeSrc.getReg(1)});
6639
6640 // Merge the ending vectors
6641 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6642
6643 MI.eraseFromParent();
6644 return Legalized;
6645 }
6646 return UnableToLegalize;
6647}
6648
6650 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6652 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6653 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6654 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6655 // %lo16(<4 x s16>) = G_TRUNC %inlo
6656 // %hi16(<4 x s16>) = G_TRUNC %inhi
6657 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6658 // %res(<8 x s8>) = G_TRUNC %in16
6659
6660 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6661
6662 Register DstReg = MI.getOperand(0).getReg();
6663 Register SrcReg = MI.getOperand(1).getReg();
6664 LLT DstTy = MRI.getType(DstReg);
6665 LLT SrcTy = MRI.getType(SrcReg);
6666
6667 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6669 isPowerOf2_32(SrcTy.getNumElements()) &&
6671 // Split input type.
6672 LLT SplitSrcTy = SrcTy.changeElementCount(
6674
6675 // First, split the source into two smaller vectors.
6676 SmallVector<Register, 2> SplitSrcs;
6677 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6678
6679 // Truncate the splits into intermediate narrower elements.
6680 LLT InterTy;
6681 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6682 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6683 else
6684 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6685 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6686 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6687 }
6688
6689 // Combine the new truncates into one vector
6691 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6692
6693 // Truncate the new vector to the final result type
6694 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6695 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6696 else
6697 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6698
6699 MI.eraseFromParent();
6700
6701 return Legalized;
6702 }
6703 return UnableToLegalize;
6704}
6705
6708 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6709 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6710 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6711 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6712 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6713 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6714 MI.eraseFromParent();
6715 return Legalized;
6716}
6717
6719 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6720
6721 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6722 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6723
6725
6726 // If a rotate in the other direction is supported, use it.
6727 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6728 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6729 isPowerOf2_32(EltSizeInBits))
6730 return lowerRotateWithReverseRotate(MI);
6731
6732 // If a funnel shift is supported, use it.
6733 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6734 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6735 bool IsFShLegal = false;
6736 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6737 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6738 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6739 Register R3) {
6740 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6741 MI.eraseFromParent();
6742 return Legalized;
6743 };
6744 // If a funnel shift in the other direction is supported, use it.
6745 if (IsFShLegal) {
6746 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6747 } else if (isPowerOf2_32(EltSizeInBits)) {
6748 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6749 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6750 }
6751 }
6752
6753 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6754 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6755 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6756 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6757 Register ShVal;
6758 Register RevShiftVal;
6759 if (isPowerOf2_32(EltSizeInBits)) {
6760 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6761 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6762 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6763 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6764 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6765 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6766 RevShiftVal =
6767 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6768 } else {
6769 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6770 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6771 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6772 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6773 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6774 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6775 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6776 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6777 RevShiftVal =
6778 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6779 }
6780 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6781 MI.eraseFromParent();
6782 return Legalized;
6783}
6784
6785// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6786// representation.
6789 auto [Dst, Src] = MI.getFirst2Regs();
6790 const LLT S64 = LLT::scalar(64);
6791 const LLT S32 = LLT::scalar(32);
6792 const LLT S1 = LLT::scalar(1);
6793
6794 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6795
6796 // unsigned cul2f(ulong u) {
6797 // uint lz = clz(u);
6798 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6799 // u = (u << lz) & 0x7fffffffffffffffUL;
6800 // ulong t = u & 0xffffffffffUL;
6801 // uint v = (e << 23) | (uint)(u >> 40);
6802 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6803 // return as_float(v + r);
6804 // }
6805
6806 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6807 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6808
6809 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6810
6811 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6812 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6813
6814 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6815 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6816
6817 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6818 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6819
6820 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6821
6822 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6823 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6824
6825 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6826 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6827 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6828
6829 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6830 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6831 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6832 auto One = MIRBuilder.buildConstant(S32, 1);
6833
6834 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6835 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6836 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6837 MIRBuilder.buildAdd(Dst, V, R);
6838
6839 MI.eraseFromParent();
6840 return Legalized;
6841}
6842
6844 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6845
6846 if (SrcTy == LLT::scalar(1)) {
6847 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6848 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6849 MIRBuilder.buildSelect(Dst, Src, True, False);
6850 MI.eraseFromParent();
6851 return Legalized;
6852 }
6853
6854 if (SrcTy != LLT::scalar(64))
6855 return UnableToLegalize;
6856
6857 if (DstTy == LLT::scalar(32)) {
6858 // TODO: SelectionDAG has several alternative expansions to port which may
6859 // be more reasonble depending on the available instructions. If a target
6860 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6861 // intermediate type, this is probably worse.
6862 return lowerU64ToF32BitOps(MI);
6863 }
6864
6865 return UnableToLegalize;
6866}
6867
6869 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6870
6871 const LLT S64 = LLT::scalar(64);
6872 const LLT S32 = LLT::scalar(32);
6873 const LLT S1 = LLT::scalar(1);
6874
6875 if (SrcTy == S1) {
6876 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6877 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6878 MIRBuilder.buildSelect(Dst, Src, True, False);
6879 MI.eraseFromParent();
6880 return Legalized;
6881 }
6882
6883 if (SrcTy != S64)
6884 return UnableToLegalize;
6885
6886 if (DstTy == S32) {
6887 // signed cl2f(long l) {
6888 // long s = l >> 63;
6889 // float r = cul2f((l + s) ^ s);
6890 // return s ? -r : r;
6891 // }
6892 Register L = Src;
6893 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6894 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6895
6896 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6897 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6898 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6899
6900 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6901 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6903 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6904 MI.eraseFromParent();
6905 return Legalized;
6906 }
6907
6908 return UnableToLegalize;
6909}
6910
6912 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6913 const LLT S64 = LLT::scalar(64);
6914 const LLT S32 = LLT::scalar(32);
6915
6916 if (SrcTy != S64 && SrcTy != S32)
6917 return UnableToLegalize;
6918 if (DstTy != S32 && DstTy != S64)
6919 return UnableToLegalize;
6920
6921 // FPTOSI gives same result as FPTOUI for positive signed integers.
6922 // FPTOUI needs to deal with fp values that convert to unsigned integers
6923 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6924
6925 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
6926 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6928 APInt::getZero(SrcTy.getSizeInBits()));
6929 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
6930
6931 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
6932
6933 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
6934 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6935 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6936 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
6937 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
6938 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
6939 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
6940
6941 const LLT S1 = LLT::scalar(1);
6942
6943 MachineInstrBuilder FCMP =
6944 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
6945 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
6946
6947 MI.eraseFromParent();
6948 return Legalized;
6949}
6950
6952 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6953 const LLT S64 = LLT::scalar(64);
6954 const LLT S32 = LLT::scalar(32);
6955
6956 // FIXME: Only f32 to i64 conversions are supported.
6957 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6958 return UnableToLegalize;
6959
6960 // Expand f32 -> i64 conversion
6961 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6962 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6963
6964 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6965
6966 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
6967 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
6968
6969 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
6970 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
6971
6972 auto SignMask = MIRBuilder.buildConstant(SrcTy,
6973 APInt::getSignMask(SrcEltBits));
6974 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
6975 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
6976 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
6977 Sign = MIRBuilder.buildSExt(DstTy, Sign);
6978
6979 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
6980 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
6981 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
6982
6983 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
6984 R = MIRBuilder.buildZExt(DstTy, R);
6985
6986 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
6987 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
6988 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
6989 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
6990
6991 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
6992 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
6993
6994 const LLT S1 = LLT::scalar(1);
6996 S1, Exponent, ExponentLoBit);
6997
6998 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
6999
7000 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
7001 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
7002
7003 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
7004
7005 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
7006 S1, Exponent, ZeroSrcTy);
7007
7008 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
7009 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
7010
7011 MI.eraseFromParent();
7012 return Legalized;
7013}
7014
7015// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7018 const LLT S1 = LLT::scalar(1);
7019 const LLT S32 = LLT::scalar(32);
7020
7021 auto [Dst, Src] = MI.getFirst2Regs();
7022 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7023 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7024
7025 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
7026 return UnableToLegalize;
7027
7029 unsigned Flags = MI.getFlags();
7030 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
7031 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
7032 MI.eraseFromParent();
7033 return Legalized;
7034 }
7035
7036 const unsigned ExpMask = 0x7ff;
7037 const unsigned ExpBiasf64 = 1023;
7038 const unsigned ExpBiasf16 = 15;
7039
7040 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
7041 Register U = Unmerge.getReg(0);
7042 Register UH = Unmerge.getReg(1);
7043
7044 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
7046
7047 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7048 // add the f16 bias (15) to get the biased exponent for the f16 format.
7049 E = MIRBuilder.buildAdd(
7050 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
7051
7054
7055 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
7056 MIRBuilder.buildConstant(S32, 0x1ff));
7057 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
7058
7059 auto Zero = MIRBuilder.buildConstant(S32, 0);
7060 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
7061 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
7062 M = MIRBuilder.buildOr(S32, M, Lo40Set);
7063
7064 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7065 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
7066 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
7067 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
7068
7069 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
7070 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
7071
7072 // N = M | (E << 12);
7073 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7074 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7075
7076 // B = clamp(1-E, 0, 13);
7077 auto One = MIRBuilder.buildConstant(S32, 1);
7078 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7079 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7081
7082 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7083 MIRBuilder.buildConstant(S32, 0x1000));
7084
7085 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7086 auto D0 = MIRBuilder.buildShl(S32, D, B);
7087
7088 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7089 D0, SigSetHigh);
7090 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7091 D = MIRBuilder.buildOr(S32, D, D1);
7092
7093 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7094 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7095
7096 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7098
7099 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7101 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7102
7103 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7105 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7106
7107 V1 = MIRBuilder.buildOr(S32, V0, V1);
7108 V = MIRBuilder.buildAdd(S32, V, V1);
7109
7110 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7111 E, MIRBuilder.buildConstant(S32, 30));
7112 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7113 MIRBuilder.buildConstant(S32, 0x7c00), V);
7114
7115 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7116 E, MIRBuilder.buildConstant(S32, 1039));
7117 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7118
7119 // Extract the sign bit.
7120 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7121 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7122
7123 // Insert the sign bit
7124 V = MIRBuilder.buildOr(S32, Sign, V);
7125
7126 MIRBuilder.buildTrunc(Dst, V);
7127 MI.eraseFromParent();
7128 return Legalized;
7129}
7130
7133 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7134 const LLT S64 = LLT::scalar(64);
7135 const LLT S16 = LLT::scalar(16);
7136
7137 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7139
7140 return UnableToLegalize;
7141}
7142
7144 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7145 LLT Ty = MRI.getType(Dst);
7146
7147 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7148 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7149 MI.eraseFromParent();
7150 return Legalized;
7151}
7152
7154 switch (Opc) {
7155 case TargetOpcode::G_SMIN:
7156 return CmpInst::ICMP_SLT;
7157 case TargetOpcode::G_SMAX:
7158 return CmpInst::ICMP_SGT;
7159 case TargetOpcode::G_UMIN:
7160 return CmpInst::ICMP_ULT;
7161 case TargetOpcode::G_UMAX:
7162 return CmpInst::ICMP_UGT;
7163 default:
7164 llvm_unreachable("not in integer min/max");
7165 }
7166}
7167
7169 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7170
7171 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7172 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7173
7174 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7175 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7176
7177 MI.eraseFromParent();
7178 return Legalized;
7179}
7180
7183 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7184 const int Src0Size = Src0Ty.getScalarSizeInBits();
7185 const int Src1Size = Src1Ty.getScalarSizeInBits();
7186
7187 auto SignBitMask = MIRBuilder.buildConstant(
7188 Src0Ty, APInt::getSignMask(Src0Size));
7189
7190 auto NotSignBitMask = MIRBuilder.buildConstant(
7191 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7192
7193 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7194 Register And1;
7195 if (Src0Ty == Src1Ty) {
7196 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7197 } else if (Src0Size > Src1Size) {
7198 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7199 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7200 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7201 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7202 } else {
7203 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7204 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7205 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7206 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7207 }
7208
7209 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7210 // constants are a nan and -0.0, but the final result should preserve
7211 // everything.
7212 unsigned Flags = MI.getFlags();
7213
7214 // We masked the sign bit and the not-sign bit, so these are disjoint.
7215 Flags |= MachineInstr::Disjoint;
7216
7217 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7218
7219 MI.eraseFromParent();
7220 return Legalized;
7221}
7222
7225 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7226 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7227
7228 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7229 LLT Ty = MRI.getType(Dst);
7230
7231 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7232 // Insert canonicalizes if it's possible we need to quiet to get correct
7233 // sNaN behavior.
7234
7235 // Note this must be done here, and not as an optimization combine in the
7236 // absence of a dedicate quiet-snan instruction as we're using an
7237 // omni-purpose G_FCANONICALIZE.
7238 if (!isKnownNeverSNaN(Src0, MRI))
7239 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7240
7241 if (!isKnownNeverSNaN(Src1, MRI))
7242 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7243 }
7244
7245 // If there are no nans, it's safe to simply replace this with the non-IEEE
7246 // version.
7247 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7248 MI.eraseFromParent();
7249 return Legalized;
7250}
7251
7253 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7254 Register DstReg = MI.getOperand(0).getReg();
7255 LLT Ty = MRI.getType(DstReg);
7256 unsigned Flags = MI.getFlags();
7257
7258 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7259 Flags);
7260 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7261 MI.eraseFromParent();
7262 return Legalized;
7263}
7264
7267 auto [DstReg, X] = MI.getFirst2Regs();
7268 const unsigned Flags = MI.getFlags();
7269 const LLT Ty = MRI.getType(DstReg);
7270 const LLT CondTy = Ty.changeElementSize(1);
7271
7272 // round(x) =>
7273 // t = trunc(x);
7274 // d = fabs(x - t);
7275 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7276 // return t + o;
7277
7278 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7279
7280 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7281 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7282
7283 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7284 auto Cmp =
7285 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7286
7287 // Could emit G_UITOFP instead
7288 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7289 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7290 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7291 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7292
7293 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7294
7295 MI.eraseFromParent();
7296 return Legalized;
7297}
7298
7300 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7301 unsigned Flags = MI.getFlags();
7302 LLT Ty = MRI.getType(DstReg);
7303 const LLT CondTy = Ty.changeElementSize(1);
7304
7305 // result = trunc(src);
7306 // if (src < 0.0 && src != result)
7307 // result += -1.0.
7308
7309 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7310 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7311
7312 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7313 SrcReg, Zero, Flags);
7314 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7315 SrcReg, Trunc, Flags);
7316 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7317 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7318
7319 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7320 MI.eraseFromParent();
7321 return Legalized;
7322}
7323
7326 const unsigned NumOps = MI.getNumOperands();
7327 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7328 unsigned PartSize = Src0Ty.getSizeInBits();
7329
7330 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7331 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7332
7333 for (unsigned I = 2; I != NumOps; ++I) {
7334 const unsigned Offset = (I - 1) * PartSize;
7335
7336 Register SrcReg = MI.getOperand(I).getReg();
7337 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7338
7339 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7340 MRI.createGenericVirtualRegister(WideTy);
7341
7342 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7343 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7344 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7345 ResultReg = NextResult;
7346 }
7347
7348 if (DstTy.isPointer()) {
7350 DstTy.getAddressSpace())) {
7351 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7352 return UnableToLegalize;
7353 }
7354
7355 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7356 }
7357
7358 MI.eraseFromParent();
7359 return Legalized;
7360}
7361
7364 const unsigned NumDst = MI.getNumOperands() - 1;
7365 Register SrcReg = MI.getOperand(NumDst).getReg();
7366 Register Dst0Reg = MI.getOperand(0).getReg();
7367 LLT DstTy = MRI.getType(Dst0Reg);
7368 if (DstTy.isPointer())
7369 return UnableToLegalize; // TODO
7370
7371 SrcReg = coerceToScalar(SrcReg);
7372 if (!SrcReg)
7373 return UnableToLegalize;
7374
7375 // Expand scalarizing unmerge as bitcast to integer and shift.
7376 LLT IntTy = MRI.getType(SrcReg);
7377
7378 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7379
7380 const unsigned DstSize = DstTy.getSizeInBits();
7381 unsigned Offset = DstSize;
7382 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7383 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7384 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7385 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7386 }
7387
7388 MI.eraseFromParent();
7389 return Legalized;
7390}
7391
7392/// Lower a vector extract or insert by writing the vector to a stack temporary
7393/// and reloading the element or vector.
7394///
7395/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7396/// =>
7397/// %stack_temp = G_FRAME_INDEX
7398/// G_STORE %vec, %stack_temp
7399/// %idx = clamp(%idx, %vec.getNumElements())
7400/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7401/// %dst = G_LOAD %element_ptr
7404 Register DstReg = MI.getOperand(0).getReg();
7405 Register SrcVec = MI.getOperand(1).getReg();
7406 Register InsertVal;
7407 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7408 InsertVal = MI.getOperand(2).getReg();
7409
7410 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7411
7412 LLT VecTy = MRI.getType(SrcVec);
7413 LLT EltTy = VecTy.getElementType();
7414 unsigned NumElts = VecTy.getNumElements();
7415
7416 int64_t IdxVal;
7417 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7419 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7420
7421 if (InsertVal) {
7422 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7423 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7424 } else {
7425 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7426 }
7427
7428 MI.eraseFromParent();
7429 return Legalized;
7430 }
7431
7432 if (!EltTy.isByteSized()) { // Not implemented.
7433 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7434 return UnableToLegalize;
7435 }
7436
7437 unsigned EltBytes = EltTy.getSizeInBytes();
7438 Align VecAlign = getStackTemporaryAlignment(VecTy);
7439 Align EltAlign;
7440
7441 MachinePointerInfo PtrInfo;
7442 auto StackTemp = createStackTemporary(
7443 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7444 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7445
7446 // Get the pointer to the element, and be sure not to hit undefined behavior
7447 // if the index is out of bounds.
7448 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7449
7450 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7451 int64_t Offset = IdxVal * EltBytes;
7452 PtrInfo = PtrInfo.getWithOffset(Offset);
7453 EltAlign = commonAlignment(VecAlign, Offset);
7454 } else {
7455 // We lose information with a variable offset.
7456 EltAlign = getStackTemporaryAlignment(EltTy);
7457 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7458 }
7459
7460 if (InsertVal) {
7461 // Write the inserted element
7462 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7463
7464 // Reload the whole vector.
7465 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7466 } else {
7467 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7468 }
7469
7470 MI.eraseFromParent();
7471 return Legalized;
7472}
7473
7476 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7477 MI.getFirst3RegLLTs();
7478 LLT IdxTy = LLT::scalar(32);
7479
7480 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7481 Register Undef;
7483 LLT EltTy = DstTy.getScalarType();
7484
7485 for (int Idx : Mask) {
7486 if (Idx < 0) {
7487 if (!Undef.isValid())
7488 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7489 BuildVec.push_back(Undef);
7490 continue;
7491 }
7492
7493 if (Src0Ty.isScalar()) {
7494 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7495 } else {
7496 int NumElts = Src0Ty.getNumElements();
7497 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7498 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7499 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7500 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7501 BuildVec.push_back(Extract.getReg(0));
7502 }
7503 }
7504
7505 if (DstTy.isScalar())
7506 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7507 else
7508 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7509 MI.eraseFromParent();
7510 return Legalized;
7511}
7512
7514 Register AllocSize,
7515 Align Alignment,
7516 LLT PtrTy) {
7517 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7518
7519 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7520 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7521
7522 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7523 // have to generate an extra instruction to negate the alloc and then use
7524 // G_PTR_ADD to add the negative offset.
7525 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7526 if (Alignment > Align(1)) {
7527 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7528 AlignMask.negate();
7529 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7530 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7531 }
7532
7533 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7534}
7535
7538 const auto &MF = *MI.getMF();
7539 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7540 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7541 return UnableToLegalize;
7542
7543 Register Dst = MI.getOperand(0).getReg();
7544 Register AllocSize = MI.getOperand(1).getReg();
7545 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7546
7547 LLT PtrTy = MRI.getType(Dst);
7549 Register SPTmp =
7550 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7551
7552 MIRBuilder.buildCopy(SPReg, SPTmp);
7553 MIRBuilder.buildCopy(Dst, SPTmp);
7554
7555 MI.eraseFromParent();
7556 return Legalized;
7557}
7558
7562 if (!StackPtr)
7563 return UnableToLegalize;
7564
7565 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7566 MI.eraseFromParent();
7567 return Legalized;
7568}
7569
7573 if (!StackPtr)
7574 return UnableToLegalize;
7575
7576 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7577 MI.eraseFromParent();
7578 return Legalized;
7579}
7580
7583 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7584 unsigned Offset = MI.getOperand(2).getImm();
7585
7586 // Extract sub-vector or one element
7587 if (SrcTy.isVector()) {
7588 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7589 unsigned DstSize = DstTy.getSizeInBits();
7590
7591 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7592 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7593 // Unmerge and allow access to each Src element for the artifact combiner.
7594 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7595
7596 // Take element(s) we need to extract and copy it (merge them).
7597 SmallVector<Register, 8> SubVectorElts;
7598 for (unsigned Idx = Offset / SrcEltSize;
7599 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7600 SubVectorElts.push_back(Unmerge.getReg(Idx));
7601 }
7602 if (SubVectorElts.size() == 1)
7603 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7604 else
7605 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7606
7607 MI.eraseFromParent();
7608 return Legalized;
7609 }
7610 }
7611
7612 if (DstTy.isScalar() &&
7613 (SrcTy.isScalar() ||
7614 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7615 LLT SrcIntTy = SrcTy;
7616 if (!SrcTy.isScalar()) {
7617 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7618 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7619 }
7620
7621 if (Offset == 0)
7622 MIRBuilder.buildTrunc(DstReg, SrcReg);
7623 else {
7624 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7625 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7626 MIRBuilder.buildTrunc(DstReg, Shr);
7627 }
7628
7629 MI.eraseFromParent();
7630 return Legalized;
7631 }
7632
7633 return UnableToLegalize;
7634}
7635
7637 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7638 uint64_t Offset = MI.getOperand(3).getImm();
7639
7640 LLT DstTy = MRI.getType(Src);
7641 LLT InsertTy = MRI.getType(InsertSrc);
7642
7643 // Insert sub-vector or one element
7644 if (DstTy.isVector() && !InsertTy.isPointer()) {
7645 LLT EltTy = DstTy.getElementType();
7646 unsigned EltSize = EltTy.getSizeInBits();
7647 unsigned InsertSize = InsertTy.getSizeInBits();
7648
7649 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7650 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7651 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7653 unsigned Idx = 0;
7654 // Elements from Src before insert start Offset
7655 for (; Idx < Offset / EltSize; ++Idx) {
7656 DstElts.push_back(UnmergeSrc.getReg(Idx));
7657 }
7658
7659 // Replace elements in Src with elements from InsertSrc
7660 if (InsertTy.getSizeInBits() > EltSize) {
7661 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7662 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7663 ++Idx, ++i) {
7664 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7665 }
7666 } else {
7667 DstElts.push_back(InsertSrc);
7668 ++Idx;
7669 }
7670
7671 // Remaining elements from Src after insert
7672 for (; Idx < DstTy.getNumElements(); ++Idx) {
7673 DstElts.push_back(UnmergeSrc.getReg(Idx));
7674 }
7675
7676 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7677 MI.eraseFromParent();
7678 return Legalized;
7679 }
7680 }
7681
7682 if (InsertTy.isVector() ||
7683 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7684 return UnableToLegalize;
7685
7687 if ((DstTy.isPointer() &&
7688 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7689 (InsertTy.isPointer() &&
7690 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7691 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7692 return UnableToLegalize;
7693 }
7694
7695 LLT IntDstTy = DstTy;
7696
7697 if (!DstTy.isScalar()) {
7698 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7699 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7700 }
7701
7702 if (!InsertTy.isScalar()) {
7703 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7704 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7705 }
7706
7707 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7708 if (Offset != 0) {
7709 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7710 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7711 }
7712
7714 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7715
7716 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7717 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7718 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7719
7720 MIRBuilder.buildCast(Dst, Or);
7721 MI.eraseFromParent();
7722 return Legalized;
7723}
7724
7727 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7728 MI.getFirst4RegLLTs();
7729 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7730
7731 LLT Ty = Dst0Ty;
7732 LLT BoolTy = Dst1Ty;
7733
7734 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7735
7736 if (IsAdd)
7737 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
7738 else
7739 MIRBuilder.buildSub(NewDst0, LHS, RHS);
7740
7741 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7742
7743 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7744
7745 // For an addition, the result should be less than one of the operands (LHS)
7746 // if and only if the other operand (RHS) is negative, otherwise there will
7747 // be overflow.
7748 // For a subtraction, the result should be less than one of the operands
7749 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7750 // otherwise there will be overflow.
7751 auto ResultLowerThanLHS =
7752 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
7753 auto ConditionRHS = MIRBuilder.buildICmp(
7754 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7755
7756 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7757
7758 MIRBuilder.buildCopy(Dst0, NewDst0);
7759 MI.eraseFromParent();
7760
7761 return Legalized;
7762}
7763
7766 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7767 LLT Ty = MRI.getType(Res);
7768 bool IsSigned;
7769 bool IsAdd;
7770 unsigned BaseOp;
7771 switch (MI.getOpcode()) {
7772 default:
7773 llvm_unreachable("unexpected addsat/subsat opcode");
7774 case TargetOpcode::G_UADDSAT:
7775 IsSigned = false;
7776 IsAdd = true;
7777 BaseOp = TargetOpcode::G_ADD;
7778 break;
7779 case TargetOpcode::G_SADDSAT:
7780 IsSigned = true;
7781 IsAdd = true;
7782 BaseOp = TargetOpcode::G_ADD;
7783 break;
7784 case TargetOpcode::G_USUBSAT:
7785 IsSigned = false;
7786 IsAdd = false;
7787 BaseOp = TargetOpcode::G_SUB;
7788 break;
7789 case TargetOpcode::G_SSUBSAT:
7790 IsSigned = true;
7791 IsAdd = false;
7792 BaseOp = TargetOpcode::G_SUB;
7793 break;
7794 }
7795
7796 if (IsSigned) {
7797 // sadd.sat(a, b) ->
7798 // hi = 0x7fffffff - smax(a, 0)
7799 // lo = 0x80000000 - smin(a, 0)
7800 // a + smin(smax(lo, b), hi)
7801 // ssub.sat(a, b) ->
7802 // lo = smax(a, -1) - 0x7fffffff
7803 // hi = smin(a, -1) - 0x80000000
7804 // a - smin(smax(lo, b), hi)
7805 // TODO: AMDGPU can use a "median of 3" instruction here:
7806 // a +/- med3(lo, b, hi)
7807 uint64_t NumBits = Ty.getScalarSizeInBits();
7808 auto MaxVal =
7810 auto MinVal =
7813 if (IsAdd) {
7814 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7815 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7816 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7817 } else {
7818 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7819 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7820 MaxVal);
7821 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7822 MinVal);
7823 }
7824 auto RHSClamped =
7826 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7827 } else {
7828 // uadd.sat(a, b) -> a + umin(~a, b)
7829 // usub.sat(a, b) -> a - umin(a, b)
7830 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7831 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7832 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7833 }
7834
7835 MI.eraseFromParent();
7836 return Legalized;
7837}
7838
7841 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7842 LLT Ty = MRI.getType(Res);
7843 LLT BoolTy = Ty.changeElementSize(1);
7844 bool IsSigned;
7845 bool IsAdd;
7846 unsigned OverflowOp;
7847 switch (MI.getOpcode()) {
7848 default:
7849 llvm_unreachable("unexpected addsat/subsat opcode");
7850 case TargetOpcode::G_UADDSAT:
7851 IsSigned = false;
7852 IsAdd = true;
7853 OverflowOp = TargetOpcode::G_UADDO;
7854 break;
7855 case TargetOpcode::G_SADDSAT:
7856 IsSigned = true;
7857 IsAdd = true;
7858 OverflowOp = TargetOpcode::G_SADDO;
7859 break;
7860 case TargetOpcode::G_USUBSAT:
7861 IsSigned = false;
7862 IsAdd = false;
7863 OverflowOp = TargetOpcode::G_USUBO;
7864 break;
7865 case TargetOpcode::G_SSUBSAT:
7866 IsSigned = true;
7867 IsAdd = false;
7868 OverflowOp = TargetOpcode::G_SSUBO;
7869 break;
7870 }
7871
7872 auto OverflowRes =
7873 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7874 Register Tmp = OverflowRes.getReg(0);
7875 Register Ov = OverflowRes.getReg(1);
7876 MachineInstrBuilder Clamp;
7877 if (IsSigned) {
7878 // sadd.sat(a, b) ->
7879 // {tmp, ov} = saddo(a, b)
7880 // ov ? (tmp >>s 31) + 0x80000000 : r
7881 // ssub.sat(a, b) ->
7882 // {tmp, ov} = ssubo(a, b)
7883 // ov ? (tmp >>s 31) + 0x80000000 : r
7884 uint64_t NumBits = Ty.getScalarSizeInBits();
7885 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7886 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7887 auto MinVal =
7889 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7890 } else {
7891 // uadd.sat(a, b) ->
7892 // {tmp, ov} = uaddo(a, b)
7893 // ov ? 0xffffffff : tmp
7894 // usub.sat(a, b) ->
7895 // {tmp, ov} = usubo(a, b)
7896 // ov ? 0 : tmp
7897 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7898 }
7899 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7900
7901 MI.eraseFromParent();
7902 return Legalized;
7903}
7904
7907 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7908 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7909 "Expected shlsat opcode!");
7910 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7911 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7912 LLT Ty = MRI.getType(Res);
7913 LLT BoolTy = Ty.changeElementSize(1);
7914
7915 unsigned BW = Ty.getScalarSizeInBits();
7916 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7917 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7918 : MIRBuilder.buildLShr(Ty, Result, RHS);
7919
7920 MachineInstrBuilder SatVal;
7921 if (IsSigned) {
7922 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7923 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7924 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7925 MIRBuilder.buildConstant(Ty, 0));
7926 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7927 } else {
7929 }
7930 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7931 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7932
7933 MI.eraseFromParent();
7934 return Legalized;
7935}
7936
7938 auto [Dst, Src] = MI.getFirst2Regs();
7939 const LLT Ty = MRI.getType(Src);
7940 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7941 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7942
7943 // Swap most and least significant byte, set remaining bytes in Res to zero.
7944 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7945 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7946 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7947 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7948
7949 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7950 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7951 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7952 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7953 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7954 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7955 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7956 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7957 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7958 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7959 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7960 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7961 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7962 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7963 }
7964 Res.getInstr()->getOperand(0).setReg(Dst);
7965
7966 MI.eraseFromParent();
7967 return Legalized;
7968}
7969
7970//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7972 MachineInstrBuilder Src, const APInt &Mask) {
7973 const LLT Ty = Dst.getLLTTy(*B.getMRI());
7974 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7975 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7976 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7977 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7978 return B.buildOr(Dst, LHS, RHS);
7979}
7980
7983 auto [Dst, Src] = MI.getFirst2Regs();
7984 const LLT Ty = MRI.getType(Src);
7985 unsigned Size = Ty.getScalarSizeInBits();
7986
7987 if (Size >= 8) {
7988 MachineInstrBuilder BSWAP =
7989 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7990
7991 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7992 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7993 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7994 MachineInstrBuilder Swap4 =
7995 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7996
7997 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7998 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7999 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8000 MachineInstrBuilder Swap2 =
8001 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
8002
8003 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
8004 // 6|7
8005 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8006 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8007 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8008 } else {
8009 // Expand bitreverse for types smaller than 8 bits.
8011 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8013 if (I < J) {
8014 auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8015 Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8016 } else {
8017 auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8018 Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8019 }
8020
8021 auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
8022 Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8023 if (I == 0)
8024 Tmp = Tmp2;
8025 else
8026 Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8027 }
8028 MIRBuilder.buildCopy(Dst, Tmp);
8029 }
8030
8031 MI.eraseFromParent();
8032 return Legalized;
8033}
8034
8038
8039 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8040 int NameOpIdx = IsRead ? 1 : 0;
8041 int ValRegIndex = IsRead ? 0 : 1;
8042
8043 Register ValReg = MI.getOperand(ValRegIndex).getReg();
8044 const LLT Ty = MRI.getType(ValReg);
8045 const MDString *RegStr = cast<MDString>(
8046 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8047
8048 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
8049 if (!PhysReg.isValid())
8050 return UnableToLegalize;
8051
8052 if (IsRead)
8053 MIRBuilder.buildCopy(ValReg, PhysReg);
8054 else
8055 MIRBuilder.buildCopy(PhysReg, ValReg);
8056
8057 MI.eraseFromParent();
8058 return Legalized;
8059}
8060
8063 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8064 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8065 Register Result = MI.getOperand(0).getReg();
8066 LLT OrigTy = MRI.getType(Result);
8067 auto SizeInBits = OrigTy.getScalarSizeInBits();
8068 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
8069
8070 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
8071 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
8072 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
8073 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8074
8075 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
8076 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
8077 MIRBuilder.buildTrunc(Result, Shifted);
8078
8079 MI.eraseFromParent();
8080 return Legalized;
8081}
8082
8085 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8086 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
8087
8088 if (Mask == fcNone) {
8089 MIRBuilder.buildConstant(DstReg, 0);
8090 MI.eraseFromParent();
8091 return Legalized;
8092 }
8093 if (Mask == fcAllFlags) {
8094 MIRBuilder.buildConstant(DstReg, 1);
8095 MI.eraseFromParent();
8096 return Legalized;
8097 }
8098
8099 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8100 // version
8101
8102 unsigned BitSize = SrcTy.getScalarSizeInBits();
8103 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8104
8105 LLT IntTy = LLT::scalar(BitSize);
8106 if (SrcTy.isVector())
8107 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8108 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8109
8110 // Various masks.
8111 APInt SignBit = APInt::getSignMask(BitSize);
8112 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8113 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8114 APInt ExpMask = Inf;
8115 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8116 APInt QNaNBitMask =
8117 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8118 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8119
8120 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8121 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8122 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8123 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8124 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8125
8126 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8127 auto Sign =
8129
8130 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8131 // Clang doesn't support capture of structured bindings:
8132 LLT DstTyCopy = DstTy;
8133 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8134 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8135 };
8136
8137 // Tests that involve more than one class should be processed first.
8138 if ((Mask & fcFinite) == fcFinite) {
8139 // finite(V) ==> abs(V) u< exp_mask
8140 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8141 ExpMaskC));
8142 Mask &= ~fcFinite;
8143 } else if ((Mask & fcFinite) == fcPosFinite) {
8144 // finite(V) && V > 0 ==> V u< exp_mask
8145 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8146 ExpMaskC));
8147 Mask &= ~fcPosFinite;
8148 } else if ((Mask & fcFinite) == fcNegFinite) {
8149 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8150 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8151 ExpMaskC);
8152 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8153 appendToRes(And);
8154 Mask &= ~fcNegFinite;
8155 }
8156
8157 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8158 // fcZero | fcSubnormal => test all exponent bits are 0
8159 // TODO: Handle sign bit specific cases
8160 // TODO: Handle inverted case
8161 if (PartialCheck == (fcZero | fcSubnormal)) {
8162 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8164 ExpBits, ZeroC));
8165 Mask &= ~PartialCheck;
8166 }
8167 }
8168
8169 // Check for individual classes.
8170 if (FPClassTest PartialCheck = Mask & fcZero) {
8171 if (PartialCheck == fcPosZero)
8173 AsInt, ZeroC));
8174 else if (PartialCheck == fcZero)
8175 appendToRes(
8177 else // fcNegZero
8179 AsInt, SignBitC));
8180 }
8181
8182 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8183 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8184 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8185 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8186 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8187 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8188 auto SubnormalRes =
8190 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8191 if (PartialCheck == fcNegSubnormal)
8192 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8193 appendToRes(SubnormalRes);
8194 }
8195
8196 if (FPClassTest PartialCheck = Mask & fcInf) {
8197 if (PartialCheck == fcPosInf)
8199 AsInt, InfC));
8200 else if (PartialCheck == fcInf)
8201 appendToRes(
8203 else { // fcNegInf
8204 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8205 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8207 AsInt, NegInfC));
8208 }
8209 }
8210
8211 if (FPClassTest PartialCheck = Mask & fcNan) {
8212 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8213 if (PartialCheck == fcNan) {
8214 // isnan(V) ==> abs(V) u> int(inf)
8215 appendToRes(
8217 } else if (PartialCheck == fcQNan) {
8218 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8219 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8220 InfWithQnanBitC));
8221 } else { // fcSNan
8222 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8223 // abs(V) u< (unsigned(Inf) | quiet_bit)
8224 auto IsNan =
8226 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8227 Abs, InfWithQnanBitC);
8228 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8229 }
8230 }
8231
8232 if (FPClassTest PartialCheck = Mask & fcNormal) {
8233 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8234 // (max_exp-1))
8235 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8236 auto ExpMinusOne = MIRBuilder.buildSub(
8237 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8238 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8239 auto NormalRes =
8241 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8242 if (PartialCheck == fcNegNormal)
8243 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8244 else if (PartialCheck == fcPosNormal) {
8245 auto PosSign = MIRBuilder.buildXor(
8246 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8247 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8248 }
8249 appendToRes(NormalRes);
8250 }
8251
8252 MIRBuilder.buildCopy(DstReg, Res);
8253 MI.eraseFromParent();
8254 return Legalized;
8255}
8256
8258 // Implement G_SELECT in terms of XOR, AND, OR.
8259 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8260 MI.getFirst4RegLLTs();
8261
8262 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8263 if (IsEltPtr) {
8264 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8265 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8266 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8267 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8268 DstTy = NewTy;
8269 }
8270
8271 if (MaskTy.isScalar()) {
8272 // Turn the scalar condition into a vector condition mask if needed.
8273
8274 Register MaskElt = MaskReg;
8275
8276 // The condition was potentially zero extended before, but we want a sign
8277 // extended boolean.
8278 if (MaskTy != LLT::scalar(1))
8279 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8280
8281 // Continue the sign extension (or truncate) to match the data type.
8282 MaskElt =
8283 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8284
8285 if (DstTy.isVector()) {
8286 // Generate a vector splat idiom.
8287 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8288 MaskReg = ShufSplat.getReg(0);
8289 } else {
8290 MaskReg = MaskElt;
8291 }
8292 MaskTy = DstTy;
8293 } else if (!DstTy.isVector()) {
8294 // Cannot handle the case that mask is a vector and dst is a scalar.
8295 return UnableToLegalize;
8296 }
8297
8298 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8299 return UnableToLegalize;
8300 }
8301
8302 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8303 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8304 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8305 if (IsEltPtr) {
8306 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8307 MIRBuilder.buildIntToPtr(DstReg, Or);
8308 } else {
8309 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8310 }
8311 MI.eraseFromParent();
8312 return Legalized;
8313}
8314
8316 // Split DIVREM into individual instructions.
8317 unsigned Opcode = MI.getOpcode();
8318
8320 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8321 : TargetOpcode::G_UDIV,
8322 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8324 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8325 : TargetOpcode::G_UREM,
8326 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8327 MI.eraseFromParent();
8328 return Legalized;
8329}
8330
8333 // Expand %res = G_ABS %a into:
8334 // %v1 = G_ASHR %a, scalar_size-1
8335 // %v2 = G_ADD %a, %v1
8336 // %res = G_XOR %v2, %v1
8337 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8338 Register OpReg = MI.getOperand(1).getReg();
8339 auto ShiftAmt =
8340 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8341 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8342 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8343 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8344 MI.eraseFromParent();
8345 return Legalized;
8346}
8347
8350 // Expand %res = G_ABS %a into:
8351 // %v1 = G_CONSTANT 0
8352 // %v2 = G_SUB %v1, %a
8353 // %res = G_SMAX %a, %v2
8354 Register SrcReg = MI.getOperand(1).getReg();
8355 LLT Ty = MRI.getType(SrcReg);
8356 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8357 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8358 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8359 MI.eraseFromParent();
8360 return Legalized;
8361}
8362
8365 Register SrcReg = MI.getOperand(1).getReg();
8366 Register DestReg = MI.getOperand(0).getReg();
8367 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8368 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8369 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8370 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8371 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8372 MI.eraseFromParent();
8373 return Legalized;
8374}
8375
8378 Register SrcReg = MI.getOperand(1).getReg();
8379 LLT SrcTy = MRI.getType(SrcReg);
8380 LLT DstTy = MRI.getType(SrcReg);
8381
8382 // The source could be a scalar if the IR type was <1 x sN>.
8383 if (SrcTy.isScalar()) {
8384 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8385 return UnableToLegalize; // FIXME: handle extension.
8386 // This can be just a plain copy.
8388 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8390 return Legalized;
8391 }
8392 return UnableToLegalize;
8393}
8394
8396 MachineFunction &MF = *MI.getMF();
8398 LLVMContext &Ctx = MF.getFunction().getContext();
8399 Register ListPtr = MI.getOperand(1).getReg();
8400 LLT PtrTy = MRI.getType(ListPtr);
8401
8402 // LstPtr is a pointer to the head of the list. Get the address
8403 // of the head of the list.
8404 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8405 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8406 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8407 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8408
8409 const Align A(MI.getOperand(2).getImm());
8410 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8411 if (A > TLI.getMinStackArgumentAlignment()) {
8412 Register AlignAmt =
8413 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8414 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8415 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8416 VAList = AndDst.getReg(0);
8417 }
8418
8419 // Increment the pointer, VAList, to the next vaarg
8420 // The list should be bumped by the size of element in the current head of
8421 // list.
8422 Register Dst = MI.getOperand(0).getReg();
8423 LLT LLTTy = MRI.getType(Dst);
8424 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8425 auto IncAmt =
8426 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8427 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8428
8429 // Store the increment VAList to the legalized pointer
8431 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8432 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8433 // Load the actual argument out of the pointer VAList
8434 Align EltAlignment = DL.getABITypeAlign(Ty);
8435 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8436 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8437 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8438
8439 MI.eraseFromParent();
8440 return Legalized;
8441}
8442
8444 // On Darwin, -Os means optimize for size without hurting performance, so
8445 // only really optimize for size when -Oz (MinSize) is used.
8447 return MF.getFunction().hasMinSize();
8448 return MF.getFunction().hasOptSize();
8449}
8450
8451// Returns a list of types to use for memory op lowering in MemOps. A partial
8452// port of findOptimalMemOpLowering in TargetLowering.
8453static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8454 unsigned Limit, const MemOp &Op,
8455 unsigned DstAS, unsigned SrcAS,
8456 const AttributeList &FuncAttributes,
8457 const TargetLowering &TLI) {
8458 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8459 return false;
8460
8461 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8462
8463 if (Ty == LLT()) {
8464 // Use the largest scalar type whose alignment constraints are satisfied.
8465 // We only need to check DstAlign here as SrcAlign is always greater or
8466 // equal to DstAlign (or zero).
8467 Ty = LLT::scalar(64);
8468 if (Op.isFixedDstAlign())
8469 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8470 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8471 Ty = LLT::scalar(Ty.getSizeInBytes());
8472 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8473 // FIXME: check for the largest legal type we can load/store to.
8474 }
8475
8476 unsigned NumMemOps = 0;
8477 uint64_t Size = Op.size();
8478 while (Size) {
8479 unsigned TySize = Ty.getSizeInBytes();
8480 while (TySize > Size) {
8481 // For now, only use non-vector load / store's for the left-over pieces.
8482 LLT NewTy = Ty;
8483 // FIXME: check for mem op safety and legality of the types. Not all of
8484 // SDAGisms map cleanly to GISel concepts.
8485 if (NewTy.isVector())
8486 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8487 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8488 unsigned NewTySize = NewTy.getSizeInBytes();
8489 assert(NewTySize > 0 && "Could not find appropriate type");
8490
8491 // If the new LLT cannot cover all of the remaining bits, then consider
8492 // issuing a (or a pair of) unaligned and overlapping load / store.
8493 unsigned Fast;
8494 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8495 MVT VT = getMVTForLLT(Ty);
8496 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8498 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8500 Fast)
8501 TySize = Size;
8502 else {
8503 Ty = NewTy;
8504 TySize = NewTySize;
8505 }
8506 }
8507
8508 if (++NumMemOps > Limit)
8509 return false;
8510
8511 MemOps.push_back(Ty);
8512 Size -= TySize;
8513 }
8514
8515 return true;
8516}
8517
8518// Get a vectorized representation of the memset value operand, GISel edition.
8520 MachineRegisterInfo &MRI = *MIB.getMRI();
8521 unsigned NumBits = Ty.getScalarSizeInBits();
8522 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8523 if (!Ty.isVector() && ValVRegAndVal) {
8524 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8525 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8526 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8527 }
8528
8529 // Extend the byte value to the larger type, and then multiply by a magic
8530 // value 0x010101... in order to replicate it across every byte.
8531 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8532 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8533 return MIB.buildConstant(Ty, 0).getReg(0);
8534 }
8535
8536 LLT ExtType = Ty.getScalarType();
8537 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8538 if (NumBits > 8) {
8539 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8540 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8541 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8542 }
8543
8544 // For vector types create a G_BUILD_VECTOR.
8545 if (Ty.isVector())
8546 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8547
8548 return Val;
8549}
8550
8552LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8553 uint64_t KnownLen, Align Alignment,
8554 bool IsVolatile) {
8555 auto &MF = *MI.getParent()->getParent();
8556 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8557 auto &DL = MF.getDataLayout();
8558 LLVMContext &C = MF.getFunction().getContext();
8559
8560 assert(KnownLen != 0 && "Have a zero length memset length!");
8561
8562 bool DstAlignCanChange = false;
8563 MachineFrameInfo &MFI = MF.getFrameInfo();
8564 bool OptSize = shouldLowerMemFuncForSize(MF);
8565
8566 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8567 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8568 DstAlignCanChange = true;
8569
8570 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8571 std::vector<LLT> MemOps;
8572
8573 const auto &DstMMO = **MI.memoperands_begin();
8574 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8575
8576 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8577 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8578
8579 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8580 MemOp::Set(KnownLen, DstAlignCanChange,
8581 Alignment,
8582 /*IsZeroMemset=*/IsZeroVal,
8583 /*IsVolatile=*/IsVolatile),
8584 DstPtrInfo.getAddrSpace(), ~0u,
8585 MF.getFunction().getAttributes(), TLI))
8586 return UnableToLegalize;
8587
8588 if (DstAlignCanChange) {
8589 // Get an estimate of the type from the LLT.
8590 Type *IRTy = getTypeForLLT(MemOps[0], C);
8591 Align NewAlign = DL.getABITypeAlign(IRTy);
8592 if (NewAlign > Alignment) {
8593 Alignment = NewAlign;
8594 unsigned FI = FIDef->getOperand(1).getIndex();
8595 // Give the stack frame object a larger alignment if needed.
8596 if (MFI.getObjectAlign(FI) < Alignment)
8597 MFI.setObjectAlignment(FI, Alignment);
8598 }
8599 }
8600
8601 MachineIRBuilder MIB(MI);
8602 // Find the largest store and generate the bit pattern for it.
8603 LLT LargestTy = MemOps[0];
8604 for (unsigned i = 1; i < MemOps.size(); i++)
8605 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8606 LargestTy = MemOps[i];
8607
8608 // The memset stored value is always defined as an s8, so in order to make it
8609 // work with larger store types we need to repeat the bit pattern across the
8610 // wider type.
8611 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8612
8613 if (!MemSetValue)
8614 return UnableToLegalize;
8615
8616 // Generate the stores. For each store type in the list, we generate the
8617 // matching store of that type to the destination address.
8618 LLT PtrTy = MRI.getType(Dst);
8619 unsigned DstOff = 0;
8620 unsigned Size = KnownLen;
8621 for (unsigned I = 0; I < MemOps.size(); I++) {
8622 LLT Ty = MemOps[I];
8623 unsigned TySize = Ty.getSizeInBytes();
8624 if (TySize > Size) {
8625 // Issuing an unaligned load / store pair that overlaps with the previous
8626 // pair. Adjust the offset accordingly.
8627 assert(I == MemOps.size() - 1 && I != 0);
8628 DstOff -= TySize - Size;
8629 }
8630
8631 // If this store is smaller than the largest store see whether we can get
8632 // the smaller value for free with a truncate.
8633 Register Value = MemSetValue;
8634 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8635 MVT VT = getMVTForLLT(Ty);
8636 MVT LargestVT = getMVTForLLT(LargestTy);
8637 if (!LargestTy.isVector() && !Ty.isVector() &&
8638 TLI.isTruncateFree(LargestVT, VT))
8639 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8640 else
8641 Value = getMemsetValue(Val, Ty, MIB);
8642 if (!Value)
8643 return UnableToLegalize;
8644 }
8645
8646 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8647
8648 Register Ptr = Dst;
8649 if (DstOff != 0) {
8650 auto Offset =
8651 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8652 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8653 }
8654
8655 MIB.buildStore(Value, Ptr, *StoreMMO);
8656 DstOff += Ty.getSizeInBytes();
8657 Size -= TySize;
8658 }
8659
8660 MI.eraseFromParent();
8661 return Legalized;
8662}
8663
8665LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8666 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8667
8668 auto [Dst, Src, Len] = MI.getFirst3Regs();
8669
8670 const auto *MMOIt = MI.memoperands_begin();
8671 const MachineMemOperand *MemOp = *MMOIt;
8672 bool IsVolatile = MemOp->isVolatile();
8673
8674 // See if this is a constant length copy
8675 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8676 // FIXME: support dynamically sized G_MEMCPY_INLINE
8677 assert(LenVRegAndVal &&
8678 "inline memcpy with dynamic size is not yet supported");
8679 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8680 if (KnownLen == 0) {
8681 MI.eraseFromParent();
8682 return Legalized;
8683 }
8684
8685 const auto &DstMMO = **MI.memoperands_begin();
8686 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8687 Align DstAlign = DstMMO.getBaseAlign();
8688 Align SrcAlign = SrcMMO.getBaseAlign();
8689
8690 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8691 IsVolatile);
8692}
8693
8695LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8696 uint64_t KnownLen, Align DstAlign,
8697 Align SrcAlign, bool IsVolatile) {
8698 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8699 return lowerMemcpy(MI, Dst, Src, KnownLen,
8700 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8701 IsVolatile);
8702}
8703
8705LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8706 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8707 Align SrcAlign, bool IsVolatile) {
8708 auto &MF = *MI.getParent()->getParent();
8709 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8710 auto &DL = MF.getDataLayout();
8711 LLVMContext &C = MF.getFunction().getContext();
8712
8713 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8714
8715 bool DstAlignCanChange = false;
8716 MachineFrameInfo &MFI = MF.getFrameInfo();
8717 Align Alignment = std::min(DstAlign, SrcAlign);
8718
8719 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8720 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8721 DstAlignCanChange = true;
8722
8723 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8724 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8725 // if the memcpy is in a tail call position.
8726
8727 std::vector<LLT> MemOps;
8728
8729 const auto &DstMMO = **MI.memoperands_begin();
8730 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8731 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8732 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8733
8735 MemOps, Limit,
8736 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8737 IsVolatile),
8738 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8739 MF.getFunction().getAttributes(), TLI))
8740 return UnableToLegalize;
8741
8742 if (DstAlignCanChange) {
8743 // Get an estimate of the type from the LLT.
8744 Type *IRTy = getTypeForLLT(MemOps[0], C);
8745 Align NewAlign = DL.getABITypeAlign(IRTy);
8746
8747 // Don't promote to an alignment that would require dynamic stack
8748 // realignment.
8749 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8750 if (!TRI->hasStackRealignment(MF))
8751 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8752 NewAlign = NewAlign.previous();
8753
8754 if (NewAlign > Alignment) {
8755 Alignment = NewAlign;
8756 unsigned FI = FIDef->getOperand(1).getIndex();
8757 // Give the stack frame object a larger alignment if needed.
8758 if (MFI.getObjectAlign(FI) < Alignment)
8759 MFI.setObjectAlignment(FI, Alignment);
8760 }
8761 }
8762
8763 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8764
8765 MachineIRBuilder MIB(MI);
8766 // Now we need to emit a pair of load and stores for each of the types we've
8767 // collected. I.e. for each type, generate a load from the source pointer of
8768 // that type width, and then generate a corresponding store to the dest buffer
8769 // of that value loaded. This can result in a sequence of loads and stores
8770 // mixed types, depending on what the target specifies as good types to use.
8771 unsigned CurrOffset = 0;
8772 unsigned Size = KnownLen;
8773 for (auto CopyTy : MemOps) {
8774 // Issuing an unaligned load / store pair that overlaps with the previous
8775 // pair. Adjust the offset accordingly.
8776 if (CopyTy.getSizeInBytes() > Size)
8777 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8778
8779 // Construct MMOs for the accesses.
8780 auto *LoadMMO =
8781 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8782 auto *StoreMMO =
8783 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8784
8785 // Create the load.
8786 Register LoadPtr = Src;
8788 if (CurrOffset != 0) {
8789 LLT SrcTy = MRI.getType(Src);
8790 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8791 .getReg(0);
8792 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8793 }
8794 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8795
8796 // Create the store.
8797 Register StorePtr = Dst;
8798 if (CurrOffset != 0) {
8799 LLT DstTy = MRI.getType(Dst);
8800 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8801 }
8802 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8803 CurrOffset += CopyTy.getSizeInBytes();
8804 Size -= CopyTy.getSizeInBytes();
8805 }
8806
8807 MI.eraseFromParent();
8808 return Legalized;
8809}
8810
8812LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8813 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8814 bool IsVolatile) {
8815 auto &MF = *MI.getParent()->getParent();
8816 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8817 auto &DL = MF.getDataLayout();
8818 LLVMContext &C = MF.getFunction().getContext();
8819
8820 assert(KnownLen != 0 && "Have a zero length memmove length!");
8821
8822 bool DstAlignCanChange = false;
8823 MachineFrameInfo &MFI = MF.getFrameInfo();
8824 bool OptSize = shouldLowerMemFuncForSize(MF);
8825 Align Alignment = std::min(DstAlign, SrcAlign);
8826
8827 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8828 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8829 DstAlignCanChange = true;
8830
8831 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8832 std::vector<LLT> MemOps;
8833
8834 const auto &DstMMO = **MI.memoperands_begin();
8835 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8836 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8837 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8838
8839 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8840 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8841 // same thing here.
8843 MemOps, Limit,
8844 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8845 /*IsVolatile*/ true),
8846 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8847 MF.getFunction().getAttributes(), TLI))
8848 return UnableToLegalize;
8849
8850 if (DstAlignCanChange) {
8851 // Get an estimate of the type from the LLT.
8852 Type *IRTy = getTypeForLLT(MemOps[0], C);
8853 Align NewAlign = DL.getABITypeAlign(IRTy);
8854
8855 // Don't promote to an alignment that would require dynamic stack
8856 // realignment.
8857 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8858 if (!TRI->hasStackRealignment(MF))
8859 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8860 NewAlign = NewAlign.previous();
8861
8862 if (NewAlign > Alignment) {
8863 Alignment = NewAlign;
8864 unsigned FI = FIDef->getOperand(1).getIndex();
8865 // Give the stack frame object a larger alignment if needed.
8866 if (MFI.getObjectAlign(FI) < Alignment)
8867 MFI.setObjectAlignment(FI, Alignment);
8868 }
8869 }
8870
8871 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8872
8873 MachineIRBuilder MIB(MI);
8874 // Memmove requires that we perform the loads first before issuing the stores.
8875 // Apart from that, this loop is pretty much doing the same thing as the
8876 // memcpy codegen function.
8877 unsigned CurrOffset = 0;
8879 for (auto CopyTy : MemOps) {
8880 // Construct MMO for the load.
8881 auto *LoadMMO =
8882 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8883
8884 // Create the load.
8885 Register LoadPtr = Src;
8886 if (CurrOffset != 0) {
8887 LLT SrcTy = MRI.getType(Src);
8888 auto Offset =
8889 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
8890 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8891 }
8892 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8893 CurrOffset += CopyTy.getSizeInBytes();
8894 }
8895
8896 CurrOffset = 0;
8897 for (unsigned I = 0; I < MemOps.size(); ++I) {
8898 LLT CopyTy = MemOps[I];
8899 // Now store the values loaded.
8900 auto *StoreMMO =
8901 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8902
8903 Register StorePtr = Dst;
8904 if (CurrOffset != 0) {
8905 LLT DstTy = MRI.getType(Dst);
8906 auto Offset =
8907 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
8908 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8909 }
8910 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8911 CurrOffset += CopyTy.getSizeInBytes();
8912 }
8913 MI.eraseFromParent();
8914 return Legalized;
8915}
8916
8919 const unsigned Opc = MI.getOpcode();
8920 // This combine is fairly complex so it's not written with a separate
8921 // matcher function.
8922 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8923 Opc == TargetOpcode::G_MEMSET) &&
8924 "Expected memcpy like instruction");
8925
8926 auto MMOIt = MI.memoperands_begin();
8927 const MachineMemOperand *MemOp = *MMOIt;
8928
8929 Align DstAlign = MemOp->getBaseAlign();
8930 Align SrcAlign;
8931 auto [Dst, Src, Len] = MI.getFirst3Regs();
8932
8933 if (Opc != TargetOpcode::G_MEMSET) {
8934 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8935 MemOp = *(++MMOIt);
8936 SrcAlign = MemOp->getBaseAlign();
8937 }
8938
8939 // See if this is a constant length copy
8940 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8941 if (!LenVRegAndVal)
8942 return UnableToLegalize;
8943 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8944
8945 if (KnownLen == 0) {
8946 MI.eraseFromParent();
8947 return Legalized;
8948 }
8949
8950 bool IsVolatile = MemOp->isVolatile();
8951 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8952 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8953 IsVolatile);
8954
8955 // Don't try to optimize volatile.
8956 if (IsVolatile)
8957 return UnableToLegalize;
8958
8959 if (MaxLen && KnownLen > MaxLen)
8960 return UnableToLegalize;
8961
8962 if (Opc == TargetOpcode::G_MEMCPY) {
8963 auto &MF = *MI.getParent()->getParent();
8964 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8965 bool OptSize = shouldLowerMemFuncForSize(MF);
8966 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8967 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8968 IsVolatile);
8969 }
8970 if (Opc == TargetOpcode::G_MEMMOVE)
8971 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8972 if (Opc == TargetOpcode::G_MEMSET)
8973 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8974 return UnableToLegalize;
8975}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1237
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1038
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:998
Class for arbitrary precision integers.
Definition: APInt.h:77
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:208
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:185
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:188
void negate()
Negate this APInt in place.
Definition: APInt.h:1429
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:198
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:852
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:249
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:820
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
bool isSigned() const
Definition: InstrTypes.h:1007
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:314
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:230
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:940
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:228
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1910
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1555
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1159
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:493
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:349
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:219
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:334
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1247
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:597
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:271
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:272
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)