LLVM  16.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/Support/Debug.h"
36 #include <numeric>
37 #include <optional>
38 
39 #define DEBUG_TYPE "legalizer"
40 
41 using namespace llvm;
42 using namespace LegalizeActions;
43 using namespace MIPatternMatch;
44 
45 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
46 ///
47 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
48 /// with any leftover piece as type \p LeftoverTy
49 ///
50 /// Returns -1 in the first element of the pair if the breakdown is not
51 /// satisfiable.
52 static std::pair<int, int>
53 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
54  assert(!LeftoverTy.isValid() && "this is an out argument");
55 
56  unsigned Size = OrigTy.getSizeInBits();
57  unsigned NarrowSize = NarrowTy.getSizeInBits();
58  unsigned NumParts = Size / NarrowSize;
59  unsigned LeftoverSize = Size - NumParts * NarrowSize;
60  assert(Size > NarrowSize);
61 
62  if (LeftoverSize == 0)
63  return {NumParts, 0};
64 
65  if (NarrowTy.isVector()) {
66  unsigned EltSize = OrigTy.getScalarSizeInBits();
67  if (LeftoverSize % EltSize != 0)
68  return {-1, -1};
69  LeftoverTy = LLT::scalarOrVector(
70  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
71  } else {
72  LeftoverTy = LLT::scalar(LeftoverSize);
73  }
74 
75  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
76  return std::make_pair(NumParts, NumLeftover);
77 }
78 
80 
81  if (!Ty.isScalar())
82  return nullptr;
83 
84  switch (Ty.getSizeInBits()) {
85  case 16:
86  return Type::getHalfTy(Ctx);
87  case 32:
88  return Type::getFloatTy(Ctx);
89  case 64:
90  return Type::getDoubleTy(Ctx);
91  case 80:
92  return Type::getX86_FP80Ty(Ctx);
93  case 128:
94  return Type::getFP128Ty(Ctx);
95  default:
96  return nullptr;
97  }
98 }
99 
101  GISelChangeObserver &Observer,
103  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
104  LI(*MF.getSubtarget().getLegalizerInfo()),
105  TLI(*MF.getSubtarget().getTargetLowering()) { }
106 
108  GISelChangeObserver &Observer,
110  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
111  TLI(*MF.getSubtarget().getTargetLowering()) { }
112 
115  LostDebugLocObserver &LocObserver) {
116  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
117 
119 
120  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
121  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
122  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
123  auto Step = LI.getAction(MI, MRI);
124  switch (Step.Action) {
125  case Legal:
126  LLVM_DEBUG(dbgs() << ".. Already legal\n");
127  return AlreadyLegal;
128  case Libcall:
129  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
130  return libcall(MI, LocObserver);
131  case NarrowScalar:
132  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
133  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
134  case WidenScalar:
135  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
136  return widenScalar(MI, Step.TypeIdx, Step.NewType);
137  case Bitcast:
138  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
139  return bitcast(MI, Step.TypeIdx, Step.NewType);
140  case Lower:
141  LLVM_DEBUG(dbgs() << ".. Lower\n");
142  return lower(MI, Step.TypeIdx, Step.NewType);
143  case FewerElements:
144  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
145  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
146  case MoreElements:
147  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
148  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
149  case Custom:
150  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
151  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
152  default:
153  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
154  return UnableToLegalize;
155  }
156 }
157 
158 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
159  SmallVectorImpl<Register> &VRegs) {
160  for (int i = 0; i < NumParts; ++i)
161  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
162  MIRBuilder.buildUnmerge(VRegs, Reg);
163 }
164 
165 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
166  LLT MainTy, LLT &LeftoverTy,
168  SmallVectorImpl<Register> &LeftoverRegs) {
169  assert(!LeftoverTy.isValid() && "this is an out argument");
170 
171  unsigned RegSize = RegTy.getSizeInBits();
172  unsigned MainSize = MainTy.getSizeInBits();
173  unsigned NumParts = RegSize / MainSize;
174  unsigned LeftoverSize = RegSize - NumParts * MainSize;
175 
176  // Use an unmerge when possible.
177  if (LeftoverSize == 0) {
178  for (unsigned I = 0; I < NumParts; ++I)
179  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
180  MIRBuilder.buildUnmerge(VRegs, Reg);
181  return true;
182  }
183 
184  // Perform irregular split. Leftover is last element of RegPieces.
185  if (MainTy.isVector()) {
186  SmallVector<Register, 8> RegPieces;
187  extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
188  for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
189  VRegs.push_back(RegPieces[i]);
190  LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
191  LeftoverTy = MRI.getType(LeftoverRegs[0]);
192  return true;
193  }
194 
195  LeftoverTy = LLT::scalar(LeftoverSize);
196  // For irregular sizes, extract the individual parts.
197  for (unsigned I = 0; I != NumParts; ++I) {
198  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
199  VRegs.push_back(NewReg);
200  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
201  }
202 
203  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
204  Offset += LeftoverSize) {
205  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
206  LeftoverRegs.push_back(NewReg);
207  MIRBuilder.buildExtract(NewReg, Reg, Offset);
208  }
209 
210  return true;
211 }
212 
213 void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
214  SmallVectorImpl<Register> &VRegs) {
215  LLT RegTy = MRI.getType(Reg);
216  assert(RegTy.isVector() && "Expected a vector type");
217 
218  LLT EltTy = RegTy.getElementType();
219  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
220  unsigned RegNumElts = RegTy.getNumElements();
221  unsigned LeftoverNumElts = RegNumElts % NumElts;
222  unsigned NumNarrowTyPieces = RegNumElts / NumElts;
223 
224  // Perfect split without leftover
225  if (LeftoverNumElts == 0)
226  return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
227 
228  // Irregular split. Provide direct access to all elements for artifact
229  // combiner using unmerge to elements. Then build vectors with NumElts
230  // elements. Remaining element(s) will be (used to build vector) Leftover.
232  extractParts(Reg, EltTy, RegNumElts, Elts);
233 
234  unsigned Offset = 0;
235  // Requested sub-vectors of NarrowTy.
236  for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
237  ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
238  VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
239  }
240 
241  // Leftover element(s).
242  if (LeftoverNumElts == 1) {
243  VRegs.push_back(Elts[Offset]);
244  } else {
245  LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
246  ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
247  VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
248  }
249 }
250 
251 void LegalizerHelper::insertParts(Register DstReg,
252  LLT ResultTy, LLT PartTy,
253  ArrayRef<Register> PartRegs,
254  LLT LeftoverTy,
255  ArrayRef<Register> LeftoverRegs) {
256  if (!LeftoverTy.isValid()) {
257  assert(LeftoverRegs.empty());
258 
259  if (!ResultTy.isVector()) {
260  MIRBuilder.buildMerge(DstReg, PartRegs);
261  return;
262  }
263 
264  if (PartTy.isVector())
265  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
266  else
267  MIRBuilder.buildBuildVector(DstReg, PartRegs);
268  return;
269  }
270 
271  // Merge sub-vectors with different number of elements and insert into DstReg.
272  if (ResultTy.isVector()) {
273  assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
274  SmallVector<Register, 8> AllRegs;
275  for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
276  AllRegs.push_back(Reg);
277  return mergeMixedSubvectors(DstReg, AllRegs);
278  }
279 
280  SmallVector<Register> GCDRegs;
281  LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
282  for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
283  extractGCDType(GCDRegs, GCDTy, PartReg);
284  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
285  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
286 }
287 
288 void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
289  Register Reg) {
290  LLT Ty = MRI.getType(Reg);
291  SmallVector<Register, 8> RegElts;
292  extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
293  Elts.append(RegElts);
294 }
295 
296 /// Merge \p PartRegs with different types into \p DstReg.
297 void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
298  ArrayRef<Register> PartRegs) {
299  SmallVector<Register, 8> AllElts;
300  for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
301  appendVectorElts(AllElts, PartRegs[i]);
302 
303  Register Leftover = PartRegs[PartRegs.size() - 1];
304  if (MRI.getType(Leftover).isScalar())
305  AllElts.push_back(Leftover);
306  else
307  appendVectorElts(AllElts, Leftover);
308 
309  MIRBuilder.buildMerge(DstReg, AllElts);
310 }
311 
312 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
314  const MachineInstr &MI) {
315  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
316 
317  const int StartIdx = Regs.size();
318  const int NumResults = MI.getNumOperands() - 1;
319  Regs.resize(Regs.size() + NumResults);
320  for (int I = 0; I != NumResults; ++I)
321  Regs[StartIdx + I] = MI.getOperand(I).getReg();
322 }
323 
324 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
325  LLT GCDTy, Register SrcReg) {
326  LLT SrcTy = MRI.getType(SrcReg);
327  if (SrcTy == GCDTy) {
328  // If the source already evenly divides the result type, we don't need to do
329  // anything.
330  Parts.push_back(SrcReg);
331  } else {
332  // Need to split into common type sized pieces.
333  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
334  getUnmergeResults(Parts, *Unmerge);
335  }
336 }
337 
338 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
339  LLT NarrowTy, Register SrcReg) {
340  LLT SrcTy = MRI.getType(SrcReg);
341  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
342  extractGCDType(Parts, GCDTy, SrcReg);
343  return GCDTy;
344 }
345 
346 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
348  unsigned PadStrategy) {
349  LLT LCMTy = getLCMType(DstTy, NarrowTy);
350 
351  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
352  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
353  int NumOrigSrc = VRegs.size();
354 
355  Register PadReg;
356 
357  // Get a value we can use to pad the source value if the sources won't evenly
358  // cover the result type.
359  if (NumOrigSrc < NumParts * NumSubParts) {
360  if (PadStrategy == TargetOpcode::G_ZEXT)
361  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
362  else if (PadStrategy == TargetOpcode::G_ANYEXT)
363  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
364  else {
365  assert(PadStrategy == TargetOpcode::G_SEXT);
366 
367  // Shift the sign bit of the low register through the high register.
368  auto ShiftAmt =
370  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
371  }
372  }
373 
374  // Registers for the final merge to be produced.
375  SmallVector<Register, 4> Remerge(NumParts);
376 
377  // Registers needed for intermediate merges, which will be merged into a
378  // source for Remerge.
379  SmallVector<Register, 4> SubMerge(NumSubParts);
380 
381  // Once we've fully read off the end of the original source bits, we can reuse
382  // the same high bits for remaining padding elements.
383  Register AllPadReg;
384 
385  // Build merges to the LCM type to cover the original result type.
386  for (int I = 0; I != NumParts; ++I) {
387  bool AllMergePartsArePadding = true;
388 
389  // Build the requested merges to the requested type.
390  for (int J = 0; J != NumSubParts; ++J) {
391  int Idx = I * NumSubParts + J;
392  if (Idx >= NumOrigSrc) {
393  SubMerge[J] = PadReg;
394  continue;
395  }
396 
397  SubMerge[J] = VRegs[Idx];
398 
399  // There are meaningful bits here we can't reuse later.
400  AllMergePartsArePadding = false;
401  }
402 
403  // If we've filled up a complete piece with padding bits, we can directly
404  // emit the natural sized constant if applicable, rather than a merge of
405  // smaller constants.
406  if (AllMergePartsArePadding && !AllPadReg) {
407  if (PadStrategy == TargetOpcode::G_ANYEXT)
408  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
409  else if (PadStrategy == TargetOpcode::G_ZEXT)
410  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
411 
412  // If this is a sign extension, we can't materialize a trivial constant
413  // with the right type and have to produce a merge.
414  }
415 
416  if (AllPadReg) {
417  // Avoid creating additional instructions if we're just adding additional
418  // copies of padding bits.
419  Remerge[I] = AllPadReg;
420  continue;
421  }
422 
423  if (NumSubParts == 1)
424  Remerge[I] = SubMerge[0];
425  else
426  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
427 
428  // In the sign extend padding case, re-use the first all-signbit merge.
429  if (AllMergePartsArePadding && !AllPadReg)
430  AllPadReg = Remerge[I];
431  }
432 
433  VRegs = std::move(Remerge);
434  return LCMTy;
435 }
436 
437 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
438  ArrayRef<Register> RemergeRegs) {
439  LLT DstTy = MRI.getType(DstReg);
440 
441  // Create the merge to the widened source, and extract the relevant bits into
442  // the result.
443 
444  if (DstTy == LCMTy) {
445  MIRBuilder.buildMerge(DstReg, RemergeRegs);
446  return;
447  }
448 
449  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
450  if (DstTy.isScalar() && LCMTy.isScalar()) {
451  MIRBuilder.buildTrunc(DstReg, Remerge);
452  return;
453  }
454 
455  if (LCMTy.isVector()) {
456  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
457  SmallVector<Register, 8> UnmergeDefs(NumDefs);
458  UnmergeDefs[0] = DstReg;
459  for (unsigned I = 1; I != NumDefs; ++I)
460  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
461 
462  MIRBuilder.buildUnmerge(UnmergeDefs,
463  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
464  return;
465  }
466 
467  llvm_unreachable("unhandled case");
468 }
469 
470 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
471 #define RTLIBCASE_INT(LibcallPrefix) \
472  do { \
473  switch (Size) { \
474  case 32: \
475  return RTLIB::LibcallPrefix##32; \
476  case 64: \
477  return RTLIB::LibcallPrefix##64; \
478  case 128: \
479  return RTLIB::LibcallPrefix##128; \
480  default: \
481  llvm_unreachable("unexpected size"); \
482  } \
483  } while (0)
484 
485 #define RTLIBCASE(LibcallPrefix) \
486  do { \
487  switch (Size) { \
488  case 32: \
489  return RTLIB::LibcallPrefix##32; \
490  case 64: \
491  return RTLIB::LibcallPrefix##64; \
492  case 80: \
493  return RTLIB::LibcallPrefix##80; \
494  case 128: \
495  return RTLIB::LibcallPrefix##128; \
496  default: \
497  llvm_unreachable("unexpected size"); \
498  } \
499  } while (0)
500 
501  switch (Opcode) {
502  case TargetOpcode::G_MUL:
503  RTLIBCASE_INT(MUL_I);
504  case TargetOpcode::G_SDIV:
505  RTLIBCASE_INT(SDIV_I);
506  case TargetOpcode::G_UDIV:
507  RTLIBCASE_INT(UDIV_I);
508  case TargetOpcode::G_SREM:
509  RTLIBCASE_INT(SREM_I);
510  case TargetOpcode::G_UREM:
511  RTLIBCASE_INT(UREM_I);
512  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
513  RTLIBCASE_INT(CTLZ_I);
514  case TargetOpcode::G_FADD:
515  RTLIBCASE(ADD_F);
516  case TargetOpcode::G_FSUB:
517  RTLIBCASE(SUB_F);
518  case TargetOpcode::G_FMUL:
519  RTLIBCASE(MUL_F);
520  case TargetOpcode::G_FDIV:
521  RTLIBCASE(DIV_F);
522  case TargetOpcode::G_FEXP:
523  RTLIBCASE(EXP_F);
524  case TargetOpcode::G_FEXP2:
525  RTLIBCASE(EXP2_F);
526  case TargetOpcode::G_FREM:
527  RTLIBCASE(REM_F);
528  case TargetOpcode::G_FPOW:
529  RTLIBCASE(POW_F);
530  case TargetOpcode::G_FMA:
531  RTLIBCASE(FMA_F);
532  case TargetOpcode::G_FSIN:
533  RTLIBCASE(SIN_F);
534  case TargetOpcode::G_FCOS:
535  RTLIBCASE(COS_F);
536  case TargetOpcode::G_FLOG10:
537  RTLIBCASE(LOG10_F);
538  case TargetOpcode::G_FLOG:
539  RTLIBCASE(LOG_F);
540  case TargetOpcode::G_FLOG2:
541  RTLIBCASE(LOG2_F);
542  case TargetOpcode::G_FCEIL:
543  RTLIBCASE(CEIL_F);
544  case TargetOpcode::G_FFLOOR:
545  RTLIBCASE(FLOOR_F);
546  case TargetOpcode::G_FMINNUM:
547  RTLIBCASE(FMIN_F);
548  case TargetOpcode::G_FMAXNUM:
549  RTLIBCASE(FMAX_F);
550  case TargetOpcode::G_FSQRT:
551  RTLIBCASE(SQRT_F);
552  case TargetOpcode::G_FRINT:
553  RTLIBCASE(RINT_F);
554  case TargetOpcode::G_FNEARBYINT:
555  RTLIBCASE(NEARBYINT_F);
556  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
557  RTLIBCASE(ROUNDEVEN_F);
558  }
559  llvm_unreachable("Unknown libcall function");
560 }
561 
562 /// True if an instruction is in tail position in its caller. Intended for
563 /// legalizing libcalls as tail calls when possible.
565  const TargetInstrInfo &TII,
567  MachineBasicBlock &MBB = *MI.getParent();
568  const Function &F = MBB.getParent()->getFunction();
569 
570  // Conservatively require the attributes of the call to match those of
571  // the return. Ignore NoAlias and NonNull because they don't affect the
572  // call sequence.
573  AttributeList CallerAttrs = F.getAttributes();
574  if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
575  .removeAttribute(Attribute::NoAlias)
576  .removeAttribute(Attribute::NonNull)
577  .hasAttributes())
578  return false;
579 
580  // It's not safe to eliminate the sign / zero extension of the return value.
581  if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
582  CallerAttrs.hasRetAttr(Attribute::SExt))
583  return false;
584 
585  // Only tail call if the following instruction is a standard return or if we
586  // have a `thisreturn` callee, and a sequence like:
587  //
588  // G_MEMCPY %0, %1, %2
589  // $x0 = COPY %0
590  // RET_ReallyLR implicit $x0
591  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
592  if (Next != MBB.instr_end() && Next->isCopy()) {
593  switch (MI.getOpcode()) {
594  default:
595  llvm_unreachable("unsupported opcode");
596  case TargetOpcode::G_BZERO:
597  return false;
598  case TargetOpcode::G_MEMCPY:
599  case TargetOpcode::G_MEMMOVE:
600  case TargetOpcode::G_MEMSET:
601  break;
602  }
603 
604  Register VReg = MI.getOperand(0).getReg();
605  if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
606  return false;
607 
608  Register PReg = Next->getOperand(0).getReg();
609  if (!PReg.isPhysical())
610  return false;
611 
612  auto Ret = next_nodbg(Next, MBB.instr_end());
613  if (Ret == MBB.instr_end() || !Ret->isReturn())
614  return false;
615 
616  if (Ret->getNumImplicitOperands() != 1)
617  return false;
618 
619  if (PReg != Ret->getOperand(0).getReg())
620  return false;
621 
622  // Skip over the COPY that we just validated.
623  Next = Ret;
624  }
625 
626  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
627  return false;
628 
629  return true;
630 }
631 
633 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
634  const CallLowering::ArgInfo &Result,
636  const CallingConv::ID CC) {
637  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
638 
640  Info.CallConv = CC;
641  Info.Callee = MachineOperand::CreateES(Name);
642  Info.OrigRet = Result;
643  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
644  if (!CLI.lowerCall(MIRBuilder, Info))
646 
648 }
649 
652  const CallLowering::ArgInfo &Result,
654  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
655  const char *Name = TLI.getLibcallName(Libcall);
656  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
657  return createLibcall(MIRBuilder, Name, Result, Args, CC);
658 }
659 
660 // Useful for libcalls where all operands have the same type.
662 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
663  Type *OpType) {
664  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
665 
666  // FIXME: What does the original arg index mean here?
668  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
669  Args.push_back({MO.getReg(), OpType, 0});
670  return createLibcall(MIRBuilder, Libcall,
671  {MI.getOperand(0).getReg(), OpType, 0}, Args);
672 }
673 
676  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
677  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
678 
680  // Add all the args, except for the last which is an imm denoting 'tail'.
681  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
682  Register Reg = MI.getOperand(i).getReg();
683 
684  // Need derive an IR type for call lowering.
685  LLT OpLLT = MRI.getType(Reg);
686  Type *OpTy = nullptr;
687  if (OpLLT.isPointer())
688  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
689  else
690  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
691  Args.push_back({Reg, OpTy, 0});
692  }
693 
694  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
695  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
696  RTLIB::Libcall RTLibcall;
697  unsigned Opc = MI.getOpcode();
698  switch (Opc) {
699  case TargetOpcode::G_BZERO:
700  RTLibcall = RTLIB::BZERO;
701  break;
702  case TargetOpcode::G_MEMCPY:
703  RTLibcall = RTLIB::MEMCPY;
704  Args[0].Flags[0].setReturned();
705  break;
706  case TargetOpcode::G_MEMMOVE:
707  RTLibcall = RTLIB::MEMMOVE;
708  Args[0].Flags[0].setReturned();
709  break;
710  case TargetOpcode::G_MEMSET:
711  RTLibcall = RTLIB::MEMSET;
712  Args[0].Flags[0].setReturned();
713  break;
714  default:
715  llvm_unreachable("unsupported opcode");
716  }
717  const char *Name = TLI.getLibcallName(RTLibcall);
718 
719  // Unsupported libcall on the target.
720  if (!Name) {
721  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
722  << MIRBuilder.getTII().getName(Opc) << "\n");
724  }
725 
727  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
728  Info.Callee = MachineOperand::CreateES(Name);
729  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
730  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
731  isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
732 
733  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
734  if (!CLI.lowerCall(MIRBuilder, Info))
736 
737  if (Info.LoweredTailCall) {
738  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
739 
740  // Check debug locations before removing the return.
741  LocObserver.checkpoint(true);
742 
743  // We must have a return following the call (or debug insts) to get past
744  // isLibCallInTailPosition.
745  do {
746  MachineInstr *Next = MI.getNextNode();
747  assert(Next &&
748  (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
749  "Expected instr following MI to be return or debug inst?");
750  // We lowered a tail call, so the call is now the return from the block.
751  // Delete the old return.
752  Next->eraseFromParent();
753  } while (MI.getNextNode());
754 
755  // We expect to lose the debug location from the return.
756  LocObserver.checkpoint(false);
757  }
758 
760 }
761 
762 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
763  Type *FromType) {
764  auto ToMVT = MVT::getVT(ToType);
765  auto FromMVT = MVT::getVT(FromType);
766 
767  switch (Opcode) {
768  case TargetOpcode::G_FPEXT:
769  return RTLIB::getFPEXT(FromMVT, ToMVT);
770  case TargetOpcode::G_FPTRUNC:
771  return RTLIB::getFPROUND(FromMVT, ToMVT);
772  case TargetOpcode::G_FPTOSI:
773  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
774  case TargetOpcode::G_FPTOUI:
775  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
776  case TargetOpcode::G_SITOFP:
777  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
778  case TargetOpcode::G_UITOFP:
779  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
780  }
781  llvm_unreachable("Unsupported libcall function");
782 }
783 
786  Type *FromType) {
787  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
788  return createLibcall(MIRBuilder, Libcall,
789  {MI.getOperand(0).getReg(), ToType, 0},
790  {{MI.getOperand(1).getReg(), FromType, 0}});
791 }
792 
795  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
796  unsigned Size = LLTy.getSizeInBits();
797  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
798 
799  switch (MI.getOpcode()) {
800  default:
801  return UnableToLegalize;
802  case TargetOpcode::G_MUL:
803  case TargetOpcode::G_SDIV:
804  case TargetOpcode::G_UDIV:
805  case TargetOpcode::G_SREM:
806  case TargetOpcode::G_UREM:
807  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
808  Type *HLTy = IntegerType::get(Ctx, Size);
809  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
810  if (Status != Legalized)
811  return Status;
812  break;
813  }
814  case TargetOpcode::G_FADD:
815  case TargetOpcode::G_FSUB:
816  case TargetOpcode::G_FMUL:
817  case TargetOpcode::G_FDIV:
818  case TargetOpcode::G_FMA:
819  case TargetOpcode::G_FPOW:
820  case TargetOpcode::G_FREM:
821  case TargetOpcode::G_FCOS:
822  case TargetOpcode::G_FSIN:
823  case TargetOpcode::G_FLOG10:
824  case TargetOpcode::G_FLOG:
825  case TargetOpcode::G_FLOG2:
826  case TargetOpcode::G_FEXP:
827  case TargetOpcode::G_FEXP2:
828  case TargetOpcode::G_FCEIL:
829  case TargetOpcode::G_FFLOOR:
830  case TargetOpcode::G_FMINNUM:
831  case TargetOpcode::G_FMAXNUM:
832  case TargetOpcode::G_FSQRT:
833  case TargetOpcode::G_FRINT:
834  case TargetOpcode::G_FNEARBYINT:
835  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
836  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
837  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
838  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
839  return UnableToLegalize;
840  }
841  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
842  if (Status != Legalized)
843  return Status;
844  break;
845  }
846  case TargetOpcode::G_FPEXT:
847  case TargetOpcode::G_FPTRUNC: {
848  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
849  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
850  if (!FromTy || !ToTy)
851  return UnableToLegalize;
853  if (Status != Legalized)
854  return Status;
855  break;
856  }
857  case TargetOpcode::G_FPTOSI:
858  case TargetOpcode::G_FPTOUI: {
859  // FIXME: Support other types
860  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
861  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
862  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
863  return UnableToLegalize;
865  MI, MIRBuilder,
866  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
867  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
868  if (Status != Legalized)
869  return Status;
870  break;
871  }
872  case TargetOpcode::G_SITOFP:
873  case TargetOpcode::G_UITOFP: {
874  // FIXME: Support other types
875  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
876  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
877  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
878  return UnableToLegalize;
880  MI, MIRBuilder,
881  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
882  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
883  if (Status != Legalized)
884  return Status;
885  break;
886  }
887  case TargetOpcode::G_BZERO:
888  case TargetOpcode::G_MEMCPY:
889  case TargetOpcode::G_MEMMOVE:
890  case TargetOpcode::G_MEMSET: {
891  LegalizeResult Result =
892  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
893  if (Result != Legalized)
894  return Result;
895  MI.eraseFromParent();
896  return Result;
897  }
898  }
899 
900  MI.eraseFromParent();
901  return Legalized;
902 }
903 
905  unsigned TypeIdx,
906  LLT NarrowTy) {
907  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
908  uint64_t NarrowSize = NarrowTy.getSizeInBits();
909 
910  switch (MI.getOpcode()) {
911  default:
912  return UnableToLegalize;
913  case TargetOpcode::G_IMPLICIT_DEF: {
914  Register DstReg = MI.getOperand(0).getReg();
915  LLT DstTy = MRI.getType(DstReg);
916 
917  // If SizeOp0 is not an exact multiple of NarrowSize, emit
918  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
919  // FIXME: Although this would also be legal for the general case, it causes
920  // a lot of regressions in the emitted code (superfluous COPYs, artifact
921  // combines not being hit). This seems to be a problem related to the
922  // artifact combiner.
923  if (SizeOp0 % NarrowSize != 0) {
924  LLT ImplicitTy = NarrowTy;
925  if (DstTy.isVector())
926  ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
927 
928  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
929  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
930 
931  MI.eraseFromParent();
932  return Legalized;
933  }
934 
935  int NumParts = SizeOp0 / NarrowSize;
936 
937  SmallVector<Register, 2> DstRegs;
938  for (int i = 0; i < NumParts; ++i)
939  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
940 
941  if (DstTy.isVector())
942  MIRBuilder.buildBuildVector(DstReg, DstRegs);
943  else
944  MIRBuilder.buildMerge(DstReg, DstRegs);
945  MI.eraseFromParent();
946  return Legalized;
947  }
948  case TargetOpcode::G_CONSTANT: {
949  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
950  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
951  unsigned TotalSize = Ty.getSizeInBits();
952  unsigned NarrowSize = NarrowTy.getSizeInBits();
953  int NumParts = TotalSize / NarrowSize;
954 
955  SmallVector<Register, 4> PartRegs;
956  for (int I = 0; I != NumParts; ++I) {
957  unsigned Offset = I * NarrowSize;
958  auto K = MIRBuilder.buildConstant(NarrowTy,
959  Val.lshr(Offset).trunc(NarrowSize));
960  PartRegs.push_back(K.getReg(0));
961  }
962 
963  LLT LeftoverTy;
964  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
965  SmallVector<Register, 1> LeftoverRegs;
966  if (LeftoverBits != 0) {
967  LeftoverTy = LLT::scalar(LeftoverBits);
968  auto K = MIRBuilder.buildConstant(
969  LeftoverTy,
970  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
971  LeftoverRegs.push_back(K.getReg(0));
972  }
973 
974  insertParts(MI.getOperand(0).getReg(),
975  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
976 
977  MI.eraseFromParent();
978  return Legalized;
979  }
980  case TargetOpcode::G_SEXT:
981  case TargetOpcode::G_ZEXT:
982  case TargetOpcode::G_ANYEXT:
983  return narrowScalarExt(MI, TypeIdx, NarrowTy);
984  case TargetOpcode::G_TRUNC: {
985  if (TypeIdx != 1)
986  return UnableToLegalize;
987 
988  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
989  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
990  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
991  return UnableToLegalize;
992  }
993 
994  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
995  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
996  MI.eraseFromParent();
997  return Legalized;
998  }
999 
1000  case TargetOpcode::G_FREEZE: {
1001  if (TypeIdx != 0)
1002  return UnableToLegalize;
1003 
1004  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1005  // Should widen scalar first
1006  if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1007  return UnableToLegalize;
1008 
1009  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1011  for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1012  Parts.push_back(
1013  MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1014  }
1015 
1016  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
1017  MI.eraseFromParent();
1018  return Legalized;
1019  }
1020  case TargetOpcode::G_ADD:
1021  case TargetOpcode::G_SUB:
1022  case TargetOpcode::G_SADDO:
1023  case TargetOpcode::G_SSUBO:
1024  case TargetOpcode::G_SADDE:
1025  case TargetOpcode::G_SSUBE:
1026  case TargetOpcode::G_UADDO:
1027  case TargetOpcode::G_USUBO:
1028  case TargetOpcode::G_UADDE:
1029  case TargetOpcode::G_USUBE:
1030  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1031  case TargetOpcode::G_MUL:
1032  case TargetOpcode::G_UMULH:
1033  return narrowScalarMul(MI, NarrowTy);
1034  case TargetOpcode::G_EXTRACT:
1035  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1036  case TargetOpcode::G_INSERT:
1037  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1038  case TargetOpcode::G_LOAD: {
1039  auto &LoadMI = cast<GLoad>(MI);
1040  Register DstReg = LoadMI.getDstReg();
1041  LLT DstTy = MRI.getType(DstReg);
1042  if (DstTy.isVector())
1043  return UnableToLegalize;
1044 
1045  if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1046  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1047  MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1048  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1049  LoadMI.eraseFromParent();
1050  return Legalized;
1051  }
1052 
1053  return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1054  }
1055  case TargetOpcode::G_ZEXTLOAD:
1056  case TargetOpcode::G_SEXTLOAD: {
1057  auto &LoadMI = cast<GExtLoad>(MI);
1058  Register DstReg = LoadMI.getDstReg();
1059  Register PtrReg = LoadMI.getPointerReg();
1060 
1061  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1062  auto &MMO = LoadMI.getMMO();
1063  unsigned MemSize = MMO.getSizeInBits();
1064 
1065  if (MemSize == NarrowSize) {
1066  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1067  } else if (MemSize < NarrowSize) {
1068  MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1069  } else if (MemSize > NarrowSize) {
1070  // FIXME: Need to split the load.
1071  return UnableToLegalize;
1072  }
1073 
1074  if (isa<GZExtLoad>(LoadMI))
1075  MIRBuilder.buildZExt(DstReg, TmpReg);
1076  else
1077  MIRBuilder.buildSExt(DstReg, TmpReg);
1078 
1079  LoadMI.eraseFromParent();
1080  return Legalized;
1081  }
1082  case TargetOpcode::G_STORE: {
1083  auto &StoreMI = cast<GStore>(MI);
1084 
1085  Register SrcReg = StoreMI.getValueReg();
1086  LLT SrcTy = MRI.getType(SrcReg);
1087  if (SrcTy.isVector())
1088  return UnableToLegalize;
1089 
1090  int NumParts = SizeOp0 / NarrowSize;
1091  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1092  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1093  if (SrcTy.isVector() && LeftoverBits != 0)
1094  return UnableToLegalize;
1095 
1096  if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1097  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1098  MIRBuilder.buildTrunc(TmpReg, SrcReg);
1099  MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1100  StoreMI.eraseFromParent();
1101  return Legalized;
1102  }
1103 
1104  return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1105  }
1106  case TargetOpcode::G_SELECT:
1107  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1108  case TargetOpcode::G_AND:
1109  case TargetOpcode::G_OR:
1110  case TargetOpcode::G_XOR: {
1111  // Legalize bitwise operation:
1112  // A = BinOp<Ty> B, C
1113  // into:
1114  // B1, ..., BN = G_UNMERGE_VALUES B
1115  // C1, ..., CN = G_UNMERGE_VALUES C
1116  // A1 = BinOp<Ty/N> B1, C2
1117  // ...
1118  // AN = BinOp<Ty/N> BN, CN
1119  // A = G_MERGE_VALUES A1, ..., AN
1120  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1121  }
1122  case TargetOpcode::G_SHL:
1123  case TargetOpcode::G_LSHR:
1124  case TargetOpcode::G_ASHR:
1125  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1126  case TargetOpcode::G_CTLZ:
1127  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1128  case TargetOpcode::G_CTTZ:
1129  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1130  case TargetOpcode::G_CTPOP:
1131  if (TypeIdx == 1)
1132  switch (MI.getOpcode()) {
1133  case TargetOpcode::G_CTLZ:
1134  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1135  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1136  case TargetOpcode::G_CTTZ:
1137  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1138  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1139  case TargetOpcode::G_CTPOP:
1140  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1141  default:
1142  return UnableToLegalize;
1143  }
1144 
1146  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1148  return Legalized;
1149  case TargetOpcode::G_INTTOPTR:
1150  if (TypeIdx != 1)
1151  return UnableToLegalize;
1152 
1154  narrowScalarSrc(MI, NarrowTy, 1);
1156  return Legalized;
1157  case TargetOpcode::G_PTRTOINT:
1158  if (TypeIdx != 0)
1159  return UnableToLegalize;
1160 
1162  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1164  return Legalized;
1165  case TargetOpcode::G_PHI: {
1166  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1167  // NarrowSize.
1168  if (SizeOp0 % NarrowSize != 0)
1169  return UnableToLegalize;
1170 
1171  unsigned NumParts = SizeOp0 / NarrowSize;
1172  SmallVector<Register, 2> DstRegs(NumParts);
1173  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1175  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1176  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1177  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1178  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1179  SrcRegs[i / 2]);
1180  }
1181  MachineBasicBlock &MBB = *MI.getParent();
1183  for (unsigned i = 0; i < NumParts; ++i) {
1184  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1185  MachineInstrBuilder MIB =
1186  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1187  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1188  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1189  }
1191  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1193  MI.eraseFromParent();
1194  return Legalized;
1195  }
1196  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1197  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1198  if (TypeIdx != 2)
1199  return UnableToLegalize;
1200 
1201  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1203  narrowScalarSrc(MI, NarrowTy, OpIdx);
1205  return Legalized;
1206  }
1207  case TargetOpcode::G_ICMP: {
1208  Register LHS = MI.getOperand(2).getReg();
1209  LLT SrcTy = MRI.getType(LHS);
1210  uint64_t SrcSize = SrcTy.getSizeInBits();
1211  CmpInst::Predicate Pred =
1212  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1213 
1214  // TODO: Handle the non-equality case for weird sizes.
1215  if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1216  return UnableToLegalize;
1217 
1218  LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1219  SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1220  if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1221  LHSLeftoverRegs))
1222  return UnableToLegalize;
1223 
1224  LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1225  SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1226  if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1227  RHSPartRegs, RHSLeftoverRegs))
1228  return UnableToLegalize;
1229 
1230  // We now have the LHS and RHS of the compare split into narrow-type
1231  // registers, plus potentially some leftover type.
1232  Register Dst = MI.getOperand(0).getReg();
1233  LLT ResTy = MRI.getType(Dst);
1234  if (ICmpInst::isEquality(Pred)) {
1235  // For each part on the LHS and RHS, keep track of the result of XOR-ing
1236  // them together. For each equal part, the result should be all 0s. For
1237  // each non-equal part, we'll get at least one 1.
1238  auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1240  for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1241  auto LHS = std::get<0>(LHSAndRHS);
1242  auto RHS = std::get<1>(LHSAndRHS);
1243  auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1244  Xors.push_back(Xor);
1245  }
1246 
1247  // Build a G_XOR for each leftover register. Each G_XOR must be widened
1248  // to the desired narrow type so that we can OR them together later.
1249  SmallVector<Register, 4> WidenedXors;
1250  for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1251  auto LHS = std::get<0>(LHSAndRHS);
1252  auto RHS = std::get<1>(LHSAndRHS);
1253  auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1254  LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1255  buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1256  /* PadStrategy = */ TargetOpcode::G_ZEXT);
1257  Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1258  }
1259 
1260  // Now, for each part we broke up, we know if they are equal/not equal
1261  // based off the G_XOR. We can OR these all together and compare against
1262  // 0 to get the result.
1263  assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1264  auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1265  for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1266  Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1267  MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1268  } else {
1269  // TODO: Handle non-power-of-two types.
1270  assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1271  assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1272  Register LHSL = LHSPartRegs[0];
1273  Register LHSH = LHSPartRegs[1];
1274  Register RHSL = RHSPartRegs[0];
1275  Register RHSH = RHSPartRegs[1];
1276  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1277  MachineInstrBuilder CmpHEQ =
1278  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1280  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1281  MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1282  }
1283  MI.eraseFromParent();
1284  return Legalized;
1285  }
1286  case TargetOpcode::G_SEXT_INREG: {
1287  if (TypeIdx != 0)
1288  return UnableToLegalize;
1289 
1290  int64_t SizeInBits = MI.getOperand(2).getImm();
1291 
1292  // So long as the new type has more bits than the bits we're extending we
1293  // don't need to break it apart.
1294  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1296  // We don't lose any non-extension bits by truncating the src and
1297  // sign-extending the dst.
1298  MachineOperand &MO1 = MI.getOperand(1);
1299  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1300  MO1.setReg(TruncMIB.getReg(0));
1301 
1302  MachineOperand &MO2 = MI.getOperand(0);
1303  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1305  MIRBuilder.buildSExt(MO2, DstExt);
1306  MO2.setReg(DstExt);
1308  return Legalized;
1309  }
1310 
1311  // Break it apart. Components below the extension point are unmodified. The
1312  // component containing the extension point becomes a narrower SEXT_INREG.
1313  // Components above it are ashr'd from the component containing the
1314  // extension point.
1315  if (SizeOp0 % NarrowSize != 0)
1316  return UnableToLegalize;
1317  int NumParts = SizeOp0 / NarrowSize;
1318 
1319  // List the registers where the destination will be scattered.
1320  SmallVector<Register, 2> DstRegs;
1321  // List the registers where the source will be split.
1322  SmallVector<Register, 2> SrcRegs;
1323 
1324  // Create all the temporary registers.
1325  for (int i = 0; i < NumParts; ++i) {
1326  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1327 
1328  SrcRegs.push_back(SrcReg);
1329  }
1330 
1331  // Explode the big arguments into smaller chunks.
1332  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1333 
1334  Register AshrCstReg =
1335  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1336  .getReg(0);
1337  Register FullExtensionReg = 0;
1338  Register PartialExtensionReg = 0;
1339 
1340  // Do the operation on each small part.
1341  for (int i = 0; i < NumParts; ++i) {
1342  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1343  DstRegs.push_back(SrcRegs[i]);
1344  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1345  assert(PartialExtensionReg &&
1346  "Expected to visit partial extension before full");
1347  if (FullExtensionReg) {
1348  DstRegs.push_back(FullExtensionReg);
1349  continue;
1350  }
1351  DstRegs.push_back(
1352  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1353  .getReg(0));
1354  FullExtensionReg = DstRegs.back();
1355  } else {
1356  DstRegs.push_back(
1357  MIRBuilder
1358  .buildInstr(
1359  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1360  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1361  .getReg(0));
1362  PartialExtensionReg = DstRegs.back();
1363  }
1364  }
1365 
1366  // Gather the destination registers into the final destination.
1367  Register DstReg = MI.getOperand(0).getReg();
1368  MIRBuilder.buildMerge(DstReg, DstRegs);
1369  MI.eraseFromParent();
1370  return Legalized;
1371  }
1372  case TargetOpcode::G_BSWAP:
1373  case TargetOpcode::G_BITREVERSE: {
1374  if (SizeOp0 % NarrowSize != 0)
1375  return UnableToLegalize;
1376 
1378  SmallVector<Register, 2> SrcRegs, DstRegs;
1379  unsigned NumParts = SizeOp0 / NarrowSize;
1380  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1381 
1382  for (unsigned i = 0; i < NumParts; ++i) {
1383  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1384  {SrcRegs[NumParts - 1 - i]});
1385  DstRegs.push_back(DstPart.getReg(0));
1386  }
1387 
1388  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1389 
1391  MI.eraseFromParent();
1392  return Legalized;
1393  }
1394  case TargetOpcode::G_PTR_ADD:
1395  case TargetOpcode::G_PTRMASK: {
1396  if (TypeIdx != 1)
1397  return UnableToLegalize;
1399  narrowScalarSrc(MI, NarrowTy, 2);
1401  return Legalized;
1402  }
1403  case TargetOpcode::G_FPTOUI:
1404  case TargetOpcode::G_FPTOSI:
1405  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1406  case TargetOpcode::G_FPEXT:
1407  if (TypeIdx != 0)
1408  return UnableToLegalize;
1410  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1412  return Legalized;
1413  }
1414 }
1415 
1417  LLT Ty = MRI.getType(Val);
1418  if (Ty.isScalar())
1419  return Val;
1420 
1422  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1423  if (Ty.isPointer()) {
1424  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1425  return Register();
1426  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1427  }
1428 
1429  Register NewVal = Val;
1430 
1431  assert(Ty.isVector());
1432  LLT EltTy = Ty.getElementType();
1433  if (EltTy.isPointer())
1434  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1435  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1436 }
1437 
1439  unsigned OpIdx, unsigned ExtOpcode) {
1440  MachineOperand &MO = MI.getOperand(OpIdx);
1441  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1442  MO.setReg(ExtB.getReg(0));
1443 }
1444 
1446  unsigned OpIdx) {
1447  MachineOperand &MO = MI.getOperand(OpIdx);
1448  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1449  MO.setReg(ExtB.getReg(0));
1450 }
1451 
1453  unsigned OpIdx, unsigned TruncOpcode) {
1454  MachineOperand &MO = MI.getOperand(OpIdx);
1455  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1457  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1458  MO.setReg(DstExt);
1459 }
1460 
1462  unsigned OpIdx, unsigned ExtOpcode) {
1463  MachineOperand &MO = MI.getOperand(OpIdx);
1464  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1466  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1467  MO.setReg(DstTrunc);
1468 }
1469 
1471  unsigned OpIdx) {
1472  MachineOperand &MO = MI.getOperand(OpIdx);
1474  Register Dst = MO.getReg();
1475  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1476  MO.setReg(DstExt);
1478 }
1479 
1481  unsigned OpIdx) {
1482  MachineOperand &MO = MI.getOperand(OpIdx);
1485 }
1486 
1487 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1488  MachineOperand &Op = MI.getOperand(OpIdx);
1489  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1490 }
1491 
1492 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1493  MachineOperand &MO = MI.getOperand(OpIdx);
1494  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1496  MIRBuilder.buildBitcast(MO, CastDst);
1497  MO.setReg(CastDst);
1498 }
1499 
1501 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1502  LLT WideTy) {
1503  if (TypeIdx != 1)
1504  return UnableToLegalize;
1505 
1506  Register DstReg = MI.getOperand(0).getReg();
1507  LLT DstTy = MRI.getType(DstReg);
1508  if (DstTy.isVector())
1509  return UnableToLegalize;
1510 
1511  Register Src1 = MI.getOperand(1).getReg();
1512  LLT SrcTy = MRI.getType(Src1);
1513  const int DstSize = DstTy.getSizeInBits();
1514  const int SrcSize = SrcTy.getSizeInBits();
1515  const int WideSize = WideTy.getSizeInBits();
1516  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1517 
1518  unsigned NumOps = MI.getNumOperands();
1519  unsigned NumSrc = MI.getNumOperands() - 1;
1520  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1521 
1522  if (WideSize >= DstSize) {
1523  // Directly pack the bits in the target type.
1524  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1525 
1526  for (unsigned I = 2; I != NumOps; ++I) {
1527  const unsigned Offset = (I - 1) * PartSize;
1528 
1529  Register SrcReg = MI.getOperand(I).getReg();
1530  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1531 
1532  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1533 
1534  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1535  MRI.createGenericVirtualRegister(WideTy);
1536 
1537  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1538  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1539  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1540  ResultReg = NextResult;
1541  }
1542 
1543  if (WideSize > DstSize)
1544  MIRBuilder.buildTrunc(DstReg, ResultReg);
1545  else if (DstTy.isPointer())
1546  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1547 
1548  MI.eraseFromParent();
1549  return Legalized;
1550  }
1551 
1552  // Unmerge the original values to the GCD type, and recombine to the next
1553  // multiple greater than the original type.
1554  //
1555  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1556  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1557  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1558  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1559  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1560  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1561  // %12:_(s12) = G_MERGE_VALUES %10, %11
1562  //
1563  // Padding with undef if necessary:
1564  //
1565  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1566  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1567  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1568  // %7:_(s2) = G_IMPLICIT_DEF
1569  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1570  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1571  // %10:_(s12) = G_MERGE_VALUES %8, %9
1572 
1573  const int GCD = std::gcd(SrcSize, WideSize);
1574  LLT GCDTy = LLT::scalar(GCD);
1575 
1577  SmallVector<Register, 8> NewMergeRegs;
1578  SmallVector<Register, 8> Unmerges;
1579  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1580 
1581  // Decompose the original operands if they don't evenly divide.
1582  for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1583  Register SrcReg = MO.getReg();
1584  if (GCD == SrcSize) {
1585  Unmerges.push_back(SrcReg);
1586  } else {
1587  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1588  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1589  Unmerges.push_back(Unmerge.getReg(J));
1590  }
1591  }
1592 
1593  // Pad with undef to the next size that is a multiple of the requested size.
1594  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1595  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1596  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1597  Unmerges.push_back(UndefReg);
1598  }
1599 
1600  const int PartsPerGCD = WideSize / GCD;
1601 
1602  // Build merges of each piece.
1603  ArrayRef<Register> Slicer(Unmerges);
1604  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1605  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1606  NewMergeRegs.push_back(Merge.getReg(0));
1607  }
1608 
1609  // A truncate may be necessary if the requested type doesn't evenly divide the
1610  // original result type.
1611  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1612  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1613  } else {
1614  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1615  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1616  }
1617 
1618  MI.eraseFromParent();
1619  return Legalized;
1620 }
1621 
1623 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1624  LLT WideTy) {
1625  if (TypeIdx != 0)
1626  return UnableToLegalize;
1627 
1628  int NumDst = MI.getNumOperands() - 1;
1629  Register SrcReg = MI.getOperand(NumDst).getReg();
1630  LLT SrcTy = MRI.getType(SrcReg);
1631  if (SrcTy.isVector())
1632  return UnableToLegalize;
1633 
1634  Register Dst0Reg = MI.getOperand(0).getReg();
1635  LLT DstTy = MRI.getType(Dst0Reg);
1636  if (!DstTy.isScalar())
1637  return UnableToLegalize;
1638 
1639  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1640  if (SrcTy.isPointer()) {
1642  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1643  LLVM_DEBUG(
1644  dbgs() << "Not casting non-integral address space integer\n");
1645  return UnableToLegalize;
1646  }
1647 
1648  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1649  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1650  }
1651 
1652  // Widen SrcTy to WideTy. This does not affect the result, but since the
1653  // user requested this size, it is probably better handled than SrcTy and
1654  // should reduce the total number of legalization artifacts.
1655  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1656  SrcTy = WideTy;
1657  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1658  }
1659 
1660  // Theres no unmerge type to target. Directly extract the bits from the
1661  // source type
1662  unsigned DstSize = DstTy.getSizeInBits();
1663 
1664  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1665  for (int I = 1; I != NumDst; ++I) {
1666  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1667  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1668  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1669  }
1670 
1671  MI.eraseFromParent();
1672  return Legalized;
1673  }
1674 
1675  // Extend the source to a wider type.
1676  LLT LCMTy = getLCMType(SrcTy, WideTy);
1677 
1678  Register WideSrc = SrcReg;
1679  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1680  // TODO: If this is an integral address space, cast to integer and anyext.
1681  if (SrcTy.isPointer()) {
1682  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1683  return UnableToLegalize;
1684  }
1685 
1686  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1687  }
1688 
1689  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1690 
1691  // Create a sequence of unmerges and merges to the original results. Since we
1692  // may have widened the source, we will need to pad the results with dead defs
1693  // to cover the source register.
1694  // e.g. widen s48 to s64:
1695  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1696  //
1697  // =>
1698  // %4:_(s192) = G_ANYEXT %0:_(s96)
1699  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1700  // ; unpack to GCD type, with extra dead defs
1701  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1702  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1703  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1704  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1705  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1706  const LLT GCDTy = getGCDType(WideTy, DstTy);
1707  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1708  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1709 
1710  // Directly unmerge to the destination without going through a GCD type
1711  // if possible
1712  if (PartsPerRemerge == 1) {
1713  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1714 
1715  for (int I = 0; I != NumUnmerge; ++I) {
1716  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1717 
1718  for (int J = 0; J != PartsPerUnmerge; ++J) {
1719  int Idx = I * PartsPerUnmerge + J;
1720  if (Idx < NumDst)
1721  MIB.addDef(MI.getOperand(Idx).getReg());
1722  else {
1723  // Create dead def for excess components.
1724  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1725  }
1726  }
1727 
1728  MIB.addUse(Unmerge.getReg(I));
1729  }
1730  } else {
1732  for (int J = 0; J != NumUnmerge; ++J)
1733  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1734 
1735  SmallVector<Register, 8> RemergeParts;
1736  for (int I = 0; I != NumDst; ++I) {
1737  for (int J = 0; J < PartsPerRemerge; ++J) {
1738  const int Idx = I * PartsPerRemerge + J;
1739  RemergeParts.emplace_back(Parts[Idx]);
1740  }
1741 
1742  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1743  RemergeParts.clear();
1744  }
1745  }
1746 
1747  MI.eraseFromParent();
1748  return Legalized;
1749 }
1750 
1752 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1753  LLT WideTy) {
1754  Register DstReg = MI.getOperand(0).getReg();
1755  Register SrcReg = MI.getOperand(1).getReg();
1756  LLT SrcTy = MRI.getType(SrcReg);
1757 
1758  LLT DstTy = MRI.getType(DstReg);
1759  unsigned Offset = MI.getOperand(2).getImm();
1760 
1761  if (TypeIdx == 0) {
1762  if (SrcTy.isVector() || DstTy.isVector())
1763  return UnableToLegalize;
1764 
1765  SrcOp Src(SrcReg);
1766  if (SrcTy.isPointer()) {
1767  // Extracts from pointers can be handled only if they are really just
1768  // simple integers.
1770  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1771  return UnableToLegalize;
1772 
1773  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1774  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1775  SrcTy = SrcAsIntTy;
1776  }
1777 
1778  if (DstTy.isPointer())
1779  return UnableToLegalize;
1780 
1781  if (Offset == 0) {
1782  // Avoid a shift in the degenerate case.
1783  MIRBuilder.buildTrunc(DstReg,
1784  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1785  MI.eraseFromParent();
1786  return Legalized;
1787  }
1788 
1789  // Do a shift in the source type.
1790  LLT ShiftTy = SrcTy;
1791  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1792  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1793  ShiftTy = WideTy;
1794  }
1795 
1796  auto LShr = MIRBuilder.buildLShr(
1797  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1798  MIRBuilder.buildTrunc(DstReg, LShr);
1799  MI.eraseFromParent();
1800  return Legalized;
1801  }
1802 
1803  if (SrcTy.isScalar()) {
1805  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1807  return Legalized;
1808  }
1809 
1810  if (!SrcTy.isVector())
1811  return UnableToLegalize;
1812 
1813  if (DstTy != SrcTy.getElementType())
1814  return UnableToLegalize;
1815 
1816  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1817  return UnableToLegalize;
1818 
1820  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1821 
1822  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1823  Offset);
1824  widenScalarDst(MI, WideTy.getScalarType(), 0);
1826  return Legalized;
1827 }
1828 
1830 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1831  LLT WideTy) {
1832  if (TypeIdx != 0 || WideTy.isVector())
1833  return UnableToLegalize;
1835  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1836  widenScalarDst(MI, WideTy);
1838  return Legalized;
1839 }
1840 
1842 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1843  LLT WideTy) {
1844  unsigned Opcode;
1845  unsigned ExtOpcode;
1846  std::optional<Register> CarryIn;
1847  switch (MI.getOpcode()) {
1848  default:
1849  llvm_unreachable("Unexpected opcode!");
1850  case TargetOpcode::G_SADDO:
1851  Opcode = TargetOpcode::G_ADD;
1852  ExtOpcode = TargetOpcode::G_SEXT;
1853  break;
1854  case TargetOpcode::G_SSUBO:
1855  Opcode = TargetOpcode::G_SUB;
1856  ExtOpcode = TargetOpcode::G_SEXT;
1857  break;
1858  case TargetOpcode::G_UADDO:
1859  Opcode = TargetOpcode::G_ADD;
1860  ExtOpcode = TargetOpcode::G_ZEXT;
1861  break;
1862  case TargetOpcode::G_USUBO:
1863  Opcode = TargetOpcode::G_SUB;
1864  ExtOpcode = TargetOpcode::G_ZEXT;
1865  break;
1866  case TargetOpcode::G_SADDE:
1867  Opcode = TargetOpcode::G_UADDE;
1868  ExtOpcode = TargetOpcode::G_SEXT;
1869  CarryIn = MI.getOperand(4).getReg();
1870  break;
1871  case TargetOpcode::G_SSUBE:
1872  Opcode = TargetOpcode::G_USUBE;
1873  ExtOpcode = TargetOpcode::G_SEXT;
1874  CarryIn = MI.getOperand(4).getReg();
1875  break;
1876  case TargetOpcode::G_UADDE:
1877  Opcode = TargetOpcode::G_UADDE;
1878  ExtOpcode = TargetOpcode::G_ZEXT;
1879  CarryIn = MI.getOperand(4).getReg();
1880  break;
1881  case TargetOpcode::G_USUBE:
1882  Opcode = TargetOpcode::G_USUBE;
1883  ExtOpcode = TargetOpcode::G_ZEXT;
1884  CarryIn = MI.getOperand(4).getReg();
1885  break;
1886  }
1887 
1888  if (TypeIdx == 1) {
1889  unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
1890 
1892  if (CarryIn)
1893  widenScalarSrc(MI, WideTy, 4, BoolExtOp);
1894  widenScalarDst(MI, WideTy, 1);
1895 
1897  return Legalized;
1898  }
1899 
1900  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1901  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1902  // Do the arithmetic in the larger type.
1903  Register NewOp;
1904  if (CarryIn) {
1905  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1906  NewOp = MIRBuilder
1907  .buildInstr(Opcode, {WideTy, CarryOutTy},
1908  {LHSExt, RHSExt, *CarryIn})
1909  .getReg(0);
1910  } else {
1911  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1912  }
1913  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1914  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1915  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1916  // There is no overflow if the ExtOp is the same as NewOp.
1917  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1918  // Now trunc the NewOp to the original result.
1919  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1920  MI.eraseFromParent();
1921  return Legalized;
1922 }
1923 
1925 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1926  LLT WideTy) {
1927  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1928  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1929  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1930  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1931  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1932  // We can convert this to:
1933  // 1. Any extend iN to iM
1934  // 2. SHL by M-N
1935  // 3. [US][ADD|SUB|SHL]SAT
1936  // 4. L/ASHR by M-N
1937  //
1938  // It may be more efficient to lower this to a min and a max operation in
1939  // the higher precision arithmetic if the promoted operation isn't legal,
1940  // but this decision is up to the target's lowering request.
1941  Register DstReg = MI.getOperand(0).getReg();
1942 
1943  unsigned NewBits = WideTy.getScalarSizeInBits();
1944  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1945 
1946  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1947  // must not left shift the RHS to preserve the shift amount.
1948  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1949  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1950  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1951  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1952  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1953  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1954 
1955  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1956  {ShiftL, ShiftR}, MI.getFlags());
1957 
1958  // Use a shift that will preserve the number of sign bits when the trunc is
1959  // folded away.
1960  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1961  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1962 
1963  MIRBuilder.buildTrunc(DstReg, Result);
1964  MI.eraseFromParent();
1965  return Legalized;
1966 }
1967 
1969 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1970  LLT WideTy) {
1971  if (TypeIdx == 1) {
1973  widenScalarDst(MI, WideTy, 1);
1975  return Legalized;
1976  }
1977 
1978  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1979  Register Result = MI.getOperand(0).getReg();
1980  Register OriginalOverflow = MI.getOperand(1).getReg();
1981  Register LHS = MI.getOperand(2).getReg();
1982  Register RHS = MI.getOperand(3).getReg();
1983  LLT SrcTy = MRI.getType(LHS);
1984  LLT OverflowTy = MRI.getType(OriginalOverflow);
1985  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1986 
1987  // To determine if the result overflowed in the larger type, we extend the
1988  // input to the larger type, do the multiply (checking if it overflows),
1989  // then also check the high bits of the result to see if overflow happened
1990  // there.
1991  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1992  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1993  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1994 
1995  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1996  {LeftOperand, RightOperand});
1997  auto Mul = Mulo->getOperand(0);
1998  MIRBuilder.buildTrunc(Result, Mul);
1999 
2000  MachineInstrBuilder ExtResult;
2001  // Overflow occurred if it occurred in the larger type, or if the high part
2002  // of the result does not zero/sign-extend the low part. Check this second
2003  // possibility first.
2004  if (IsSigned) {
2005  // For signed, overflow occurred when the high part does not sign-extend
2006  // the low part.
2007  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2008  } else {
2009  // Unsigned overflow occurred when the high part does not zero-extend the
2010  // low part.
2011  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2012  }
2013 
2014  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2015  // so we don't need to check the overflow result of larger type Mulo.
2016  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
2017  auto Overflow =
2018  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2019  // Finally check if the multiplication in the larger type itself overflowed.
2020  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2021  } else {
2022  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2023  }
2024  MI.eraseFromParent();
2025  return Legalized;
2026 }
2027 
2029 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2030  switch (MI.getOpcode()) {
2031  default:
2032  return UnableToLegalize;
2033  case TargetOpcode::G_ATOMICRMW_XCHG:
2034  case TargetOpcode::G_ATOMICRMW_ADD:
2035  case TargetOpcode::G_ATOMICRMW_SUB:
2036  case TargetOpcode::G_ATOMICRMW_AND:
2037  case TargetOpcode::G_ATOMICRMW_OR:
2038  case TargetOpcode::G_ATOMICRMW_XOR:
2039  case TargetOpcode::G_ATOMICRMW_MIN:
2040  case TargetOpcode::G_ATOMICRMW_MAX:
2041  case TargetOpcode::G_ATOMICRMW_UMIN:
2042  case TargetOpcode::G_ATOMICRMW_UMAX:
2043  assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2045  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2046  widenScalarDst(MI, WideTy, 0);
2048  return Legalized;
2049  case TargetOpcode::G_ATOMIC_CMPXCHG:
2050  assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2052  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2053  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2054  widenScalarDst(MI, WideTy, 0);
2056  return Legalized;
2057  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2058  if (TypeIdx == 0) {
2060  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2061  widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2062  widenScalarDst(MI, WideTy, 0);
2064  return Legalized;
2065  }
2066  assert(TypeIdx == 1 &&
2067  "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2069  widenScalarDst(MI, WideTy, 1);
2071  return Legalized;
2072  case TargetOpcode::G_EXTRACT:
2073  return widenScalarExtract(MI, TypeIdx, WideTy);
2074  case TargetOpcode::G_INSERT:
2075  return widenScalarInsert(MI, TypeIdx, WideTy);
2076  case TargetOpcode::G_MERGE_VALUES:
2077  return widenScalarMergeValues(MI, TypeIdx, WideTy);
2078  case TargetOpcode::G_UNMERGE_VALUES:
2079  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2080  case TargetOpcode::G_SADDO:
2081  case TargetOpcode::G_SSUBO:
2082  case TargetOpcode::G_UADDO:
2083  case TargetOpcode::G_USUBO:
2084  case TargetOpcode::G_SADDE:
2085  case TargetOpcode::G_SSUBE:
2086  case TargetOpcode::G_UADDE:
2087  case TargetOpcode::G_USUBE:
2088  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2089  case TargetOpcode::G_UMULO:
2090  case TargetOpcode::G_SMULO:
2091  return widenScalarMulo(MI, TypeIdx, WideTy);
2092  case TargetOpcode::G_SADDSAT:
2093  case TargetOpcode::G_SSUBSAT:
2094  case TargetOpcode::G_SSHLSAT:
2095  case TargetOpcode::G_UADDSAT:
2096  case TargetOpcode::G_USUBSAT:
2097  case TargetOpcode::G_USHLSAT:
2098  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2099  case TargetOpcode::G_CTTZ:
2100  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2101  case TargetOpcode::G_CTLZ:
2102  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2103  case TargetOpcode::G_CTPOP: {
2104  if (TypeIdx == 0) {
2106  widenScalarDst(MI, WideTy, 0);
2108  return Legalized;
2109  }
2110 
2111  Register SrcReg = MI.getOperand(1).getReg();
2112 
2113  // First extend the input.
2114  unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2115  MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2116  ? TargetOpcode::G_ANYEXT
2117  : TargetOpcode::G_ZEXT;
2118  auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2119  LLT CurTy = MRI.getType(SrcReg);
2120  unsigned NewOpc = MI.getOpcode();
2121  if (NewOpc == TargetOpcode::G_CTTZ) {
2122  // The count is the same in the larger type except if the original
2123  // value was zero. This can be handled by setting the bit just off
2124  // the top of the original type.
2125  auto TopBit =
2126  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2127  MIBSrc = MIRBuilder.buildOr(
2128  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2129  // Now we know the operand is non-zero, use the more relaxed opcode.
2130  NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2131  }
2132 
2133  // Perform the operation at the larger size.
2134  auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2135  // This is already the correct result for CTPOP and CTTZs
2136  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2137  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2138  // The correct result is NewOp - (Difference in widety and current ty).
2139  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2140  MIBNewOp = MIRBuilder.buildSub(
2141  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2142  }
2143 
2144  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2145  MI.eraseFromParent();
2146  return Legalized;
2147  }
2148  case TargetOpcode::G_BSWAP: {
2150  Register DstReg = MI.getOperand(0).getReg();
2151 
2152  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2153  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2154  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2155  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2156 
2157  MI.getOperand(0).setReg(DstExt);
2158 
2160 
2161  LLT Ty = MRI.getType(DstReg);
2162  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2163  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2164  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2165 
2166  MIRBuilder.buildTrunc(DstReg, ShrReg);
2168  return Legalized;
2169  }
2170  case TargetOpcode::G_BITREVERSE: {
2172 
2173  Register DstReg = MI.getOperand(0).getReg();
2174  LLT Ty = MRI.getType(DstReg);
2175  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2176 
2177  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2178  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2179  MI.getOperand(0).setReg(DstExt);
2181 
2182  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2183  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2184  MIRBuilder.buildTrunc(DstReg, Shift);
2186  return Legalized;
2187  }
2188  case TargetOpcode::G_FREEZE:
2190  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2191  widenScalarDst(MI, WideTy);
2193  return Legalized;
2194 
2195  case TargetOpcode::G_ABS:
2197  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2198  widenScalarDst(MI, WideTy);
2200  return Legalized;
2201 
2202  case TargetOpcode::G_ADD:
2203  case TargetOpcode::G_AND:
2204  case TargetOpcode::G_MUL:
2205  case TargetOpcode::G_OR:
2206  case TargetOpcode::G_XOR:
2207  case TargetOpcode::G_SUB:
2208  // Perform operation at larger width (any extension is fines here, high bits
2209  // don't affect the result) and then truncate the result back to the
2210  // original type.
2212  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2213  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2214  widenScalarDst(MI, WideTy);
2216  return Legalized;
2217 
2218  case TargetOpcode::G_SBFX:
2219  case TargetOpcode::G_UBFX:
2221 
2222  if (TypeIdx == 0) {
2223  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2224  widenScalarDst(MI, WideTy);
2225  } else {
2226  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2227  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2228  }
2229 
2231  return Legalized;
2232 
2233  case TargetOpcode::G_SHL:
2235 
2236  if (TypeIdx == 0) {
2237  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2238  widenScalarDst(MI, WideTy);
2239  } else {
2240  assert(TypeIdx == 1);
2241  // The "number of bits to shift" operand must preserve its value as an
2242  // unsigned integer:
2243  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2244  }
2245 
2247  return Legalized;
2248 
2249  case TargetOpcode::G_SDIV:
2250  case TargetOpcode::G_SREM:
2251  case TargetOpcode::G_SMIN:
2252  case TargetOpcode::G_SMAX:
2254  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2255  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2256  widenScalarDst(MI, WideTy);
2258  return Legalized;
2259 
2260  case TargetOpcode::G_SDIVREM:
2262  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2263  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2264  widenScalarDst(MI, WideTy);
2265  widenScalarDst(MI, WideTy, 1);
2267  return Legalized;
2268 
2269  case TargetOpcode::G_ASHR:
2270  case TargetOpcode::G_LSHR:
2272 
2273  if (TypeIdx == 0) {
2274  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2275  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2276 
2277  widenScalarSrc(MI, WideTy, 1, CvtOp);
2278  widenScalarDst(MI, WideTy);
2279  } else {
2280  assert(TypeIdx == 1);
2281  // The "number of bits to shift" operand must preserve its value as an
2282  // unsigned integer:
2283  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2284  }
2285 
2287  return Legalized;
2288  case TargetOpcode::G_UDIV:
2289  case TargetOpcode::G_UREM:
2290  case TargetOpcode::G_UMIN:
2291  case TargetOpcode::G_UMAX:
2293  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2294  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2295  widenScalarDst(MI, WideTy);
2297  return Legalized;
2298 
2299  case TargetOpcode::G_UDIVREM:
2301  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2302  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2303  widenScalarDst(MI, WideTy);
2304  widenScalarDst(MI, WideTy, 1);
2306  return Legalized;
2307 
2308  case TargetOpcode::G_SELECT:
2310  if (TypeIdx == 0) {
2311  // Perform operation at larger width (any extension is fine here, high
2312  // bits don't affect the result) and then truncate the result back to the
2313  // original type.
2314  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2315  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2316  widenScalarDst(MI, WideTy);
2317  } else {
2318  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2319  // Explicit extension is required here since high bits affect the result.
2320  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2321  }
2323  return Legalized;
2324 
2325  case TargetOpcode::G_FPTOSI:
2326  case TargetOpcode::G_FPTOUI:
2328 
2329  if (TypeIdx == 0)
2330  widenScalarDst(MI, WideTy);
2331  else
2332  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2333 
2335  return Legalized;
2336  case TargetOpcode::G_SITOFP:
2338 
2339  if (TypeIdx == 0)
2340  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2341  else
2342  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2343 
2345  return Legalized;
2346  case TargetOpcode::G_UITOFP:
2348 
2349  if (TypeIdx == 0)
2350  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2351  else
2352  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2353 
2355  return Legalized;
2356  case TargetOpcode::G_LOAD:
2357  case TargetOpcode::G_SEXTLOAD:
2358  case TargetOpcode::G_ZEXTLOAD:
2360  widenScalarDst(MI, WideTy);
2362  return Legalized;
2363 
2364  case TargetOpcode::G_STORE: {
2365  if (TypeIdx != 0)
2366  return UnableToLegalize;
2367 
2368  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2369  if (!Ty.isScalar())
2370  return UnableToLegalize;
2371 
2373 
2374  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2375  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2376  widenScalarSrc(MI, WideTy, 0, ExtType);
2377 
2379  return Legalized;
2380  }
2381  case TargetOpcode::G_CONSTANT: {
2382  MachineOperand &SrcMO = MI.getOperand(1);
2384  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2385  MRI.getType(MI.getOperand(0).getReg()));
2386  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2387  ExtOpc == TargetOpcode::G_ANYEXT) &&
2388  "Illegal Extend");
2389  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2390  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2391  ? SrcVal.sext(WideTy.getSizeInBits())
2392  : SrcVal.zext(WideTy.getSizeInBits());
2394  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2395 
2396  widenScalarDst(MI, WideTy);
2398  return Legalized;
2399  }
2400  case TargetOpcode::G_FCONSTANT: {
2401  // To avoid changing the bits of the constant due to extension to a larger
2402  // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2403  MachineOperand &SrcMO = MI.getOperand(1);
2404  APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2406  auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2407  widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2408  MI.eraseFromParent();
2409  return Legalized;
2410  }
2411  case TargetOpcode::G_IMPLICIT_DEF: {
2413  widenScalarDst(MI, WideTy);
2415  return Legalized;
2416  }
2417  case TargetOpcode::G_BRCOND:
2419  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2421  return Legalized;
2422 
2423  case TargetOpcode::G_FCMP:
2425  if (TypeIdx == 0)
2426  widenScalarDst(MI, WideTy);
2427  else {
2428  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2429  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2430  }
2432  return Legalized;
2433 
2434  case TargetOpcode::G_ICMP:
2436  if (TypeIdx == 0)
2437  widenScalarDst(MI, WideTy);
2438  else {
2439  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2440  MI.getOperand(1).getPredicate()))
2441  ? TargetOpcode::G_SEXT
2442  : TargetOpcode::G_ZEXT;
2443  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2444  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2445  }
2447  return Legalized;
2448 
2449  case TargetOpcode::G_PTR_ADD:
2450  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2452  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2454  return Legalized;
2455 
2456  case TargetOpcode::G_PHI: {
2457  assert(TypeIdx == 0 && "Expecting only Idx 0");
2458 
2460  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2461  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2462  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2463  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2464  }
2465 
2466  MachineBasicBlock &MBB = *MI.getParent();
2468  widenScalarDst(MI, WideTy);
2470  return Legalized;
2471  }
2472  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2473  if (TypeIdx == 0) {
2474  Register VecReg = MI.getOperand(1).getReg();
2475  LLT VecTy = MRI.getType(VecReg);
2477 
2479  MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2480  TargetOpcode::G_ANYEXT);
2481 
2482  widenScalarDst(MI, WideTy, 0);
2484  return Legalized;
2485  }
2486 
2487  if (TypeIdx != 2)
2488  return UnableToLegalize;
2490  // TODO: Probably should be zext
2491  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2493  return Legalized;
2494  }
2495  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2496  if (TypeIdx == 1) {
2498 
2499  Register VecReg = MI.getOperand(1).getReg();
2500  LLT VecTy = MRI.getType(VecReg);
2501  LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2502 
2503  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2504  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2505  widenScalarDst(MI, WideVecTy, 0);
2507  return Legalized;
2508  }
2509 
2510  if (TypeIdx == 2) {
2512  // TODO: Probably should be zext
2513  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2515  return Legalized;
2516  }
2517 
2518  return UnableToLegalize;
2519  }
2520  case TargetOpcode::G_FADD:
2521  case TargetOpcode::G_FMUL:
2522  case TargetOpcode::G_FSUB:
2523  case TargetOpcode::G_FMA:
2524  case TargetOpcode::G_FMAD:
2525  case TargetOpcode::G_FNEG:
2526  case TargetOpcode::G_FABS:
2527  case TargetOpcode::G_FCANONICALIZE:
2528  case TargetOpcode::G_FMINNUM:
2529  case TargetOpcode::G_FMAXNUM:
2530  case TargetOpcode::G_FMINNUM_IEEE:
2531  case TargetOpcode::G_FMAXNUM_IEEE:
2532  case TargetOpcode::G_FMINIMUM:
2533  case TargetOpcode::G_FMAXIMUM:
2534  case TargetOpcode::G_FDIV:
2535  case TargetOpcode::G_FREM:
2536  case TargetOpcode::G_FCEIL:
2537  case TargetOpcode::G_FFLOOR:
2538  case TargetOpcode::G_FCOS:
2539  case TargetOpcode::G_FSIN:
2540  case TargetOpcode::G_FLOG10:
2541  case TargetOpcode::G_FLOG:
2542  case TargetOpcode::G_FLOG2:
2543  case TargetOpcode::G_FRINT:
2544  case TargetOpcode::G_FNEARBYINT:
2545  case TargetOpcode::G_FSQRT:
2546  case TargetOpcode::G_FEXP:
2547  case TargetOpcode::G_FEXP2:
2548  case TargetOpcode::G_FPOW:
2549  case TargetOpcode::G_INTRINSIC_TRUNC:
2550  case TargetOpcode::G_INTRINSIC_ROUND:
2551  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2552  assert(TypeIdx == 0);
2554 
2555  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2556  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2557 
2558  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2560  return Legalized;
2561  case TargetOpcode::G_FPOWI: {
2562  if (TypeIdx != 0)
2563  return UnableToLegalize;
2565  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2566  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2568  return Legalized;
2569  }
2570  case TargetOpcode::G_INTTOPTR:
2571  if (TypeIdx != 1)
2572  return UnableToLegalize;
2573 
2575  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2577  return Legalized;
2578  case TargetOpcode::G_PTRTOINT:
2579  if (TypeIdx != 0)
2580  return UnableToLegalize;
2581 
2583  widenScalarDst(MI, WideTy, 0);
2585  return Legalized;
2586  case TargetOpcode::G_BUILD_VECTOR: {
2588 
2589  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2590  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2591  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2592 
2593  // Avoid changing the result vector type if the source element type was
2594  // requested.
2595  if (TypeIdx == 1) {
2596  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2597  } else {
2598  widenScalarDst(MI, WideTy, 0);
2599  }
2600 
2602  return Legalized;
2603  }
2604  case TargetOpcode::G_SEXT_INREG:
2605  if (TypeIdx != 0)
2606  return UnableToLegalize;
2607 
2609  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2610  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2612  return Legalized;
2613  case TargetOpcode::G_PTRMASK: {
2614  if (TypeIdx != 1)
2615  return UnableToLegalize;
2617  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2619  return Legalized;
2620  }
2621  }
2622 }
2623 
2625  MachineIRBuilder &B, Register Src, LLT Ty) {
2626  auto Unmerge = B.buildUnmerge(Ty, Src);
2627  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2628  Pieces.push_back(Unmerge.getReg(I));
2629 }
2630 
2633  Register Dst = MI.getOperand(0).getReg();
2634  Register Src = MI.getOperand(1).getReg();
2635  LLT DstTy = MRI.getType(Dst);
2636  LLT SrcTy = MRI.getType(Src);
2637 
2638  if (SrcTy.isVector()) {
2639  LLT SrcEltTy = SrcTy.getElementType();
2640  SmallVector<Register, 8> SrcRegs;
2641 
2642  if (DstTy.isVector()) {
2643  int NumDstElt = DstTy.getNumElements();
2644  int NumSrcElt = SrcTy.getNumElements();
2645 
2646  LLT DstEltTy = DstTy.getElementType();
2647  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2648  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2649 
2650  // If there's an element size mismatch, insert intermediate casts to match
2651  // the result element type.
2652  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2653  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2654  //
2655  // =>
2656  //
2657  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2658  // %3:_(<2 x s8>) = G_BITCAST %2
2659  // %4:_(<2 x s8>) = G_BITCAST %3
2660  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2661  DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2662  SrcPartTy = SrcEltTy;
2663  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2664  //
2665  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2666  //
2667  // =>
2668  //
2669  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2670  // %3:_(s16) = G_BITCAST %2
2671  // %4:_(s16) = G_BITCAST %3
2672  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2673  SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2674  DstCastTy = DstEltTy;
2675  }
2676 
2677  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2678  for (Register &SrcReg : SrcRegs)
2679  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2680  } else
2681  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2682 
2683  MIRBuilder.buildMerge(Dst, SrcRegs);
2684  MI.eraseFromParent();
2685  return Legalized;
2686  }
2687 
2688  if (DstTy.isVector()) {
2689  SmallVector<Register, 8> SrcRegs;
2690  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2691  MIRBuilder.buildMerge(Dst, SrcRegs);
2692  MI.eraseFromParent();
2693  return Legalized;
2694  }
2695 
2696  return UnableToLegalize;
2697 }
2698 
2699 /// Figure out the bit offset into a register when coercing a vector index for
2700 /// the wide element type. This is only for the case when promoting vector to
2701 /// one with larger elements.
2702 //
2703 ///
2704 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2705 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2707  Register Idx,
2708  unsigned NewEltSize,
2709  unsigned OldEltSize) {
2710  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2711  LLT IdxTy = B.getMRI()->getType(Idx);
2712 
2713  // Now figure out the amount we need to shift to get the target bits.
2714  auto OffsetMask = B.buildConstant(
2715  IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2716  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2717  return B.buildShl(IdxTy, OffsetIdx,
2718  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2719 }
2720 
2721 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2722 /// is casting to a vector with a smaller element size, perform multiple element
2723 /// extracts and merge the results. If this is coercing to a vector with larger
2724 /// elements, index the bitcasted vector and extract the target element with bit
2725 /// operations. This is intended to force the indexing in the native register
2726 /// size for architectures that can dynamically index the register file.
2729  LLT CastTy) {
2730  if (TypeIdx != 1)
2731  return UnableToLegalize;
2732 
2733  Register Dst = MI.getOperand(0).getReg();
2734  Register SrcVec = MI.getOperand(1).getReg();
2735  Register Idx = MI.getOperand(2).getReg();
2736  LLT SrcVecTy = MRI.getType(SrcVec);
2737  LLT IdxTy = MRI.getType(Idx);
2738 
2739  LLT SrcEltTy = SrcVecTy.getElementType();
2740  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2741  unsigned OldNumElts = SrcVecTy.getNumElements();
2742 
2743  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2744  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2745 
2746  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2747  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2748  if (NewNumElts > OldNumElts) {
2749  // Decreasing the vector element size
2750  //
2751  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2752  // =>
2753  // v4i32:castx = bitcast x:v2i64
2754  //
2755  // i64 = bitcast
2756  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2757  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2758  //
2759  if (NewNumElts % OldNumElts != 0)
2760  return UnableToLegalize;
2761 
2762  // Type of the intermediate result vector.
2763  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2764  LLT MidTy =
2765  LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2766 
2767  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2768 
2769  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2770  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2771 
2772  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2773  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2774  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2775  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2776  NewOps[I] = Elt.getReg(0);
2777  }
2778 
2779  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2780  MIRBuilder.buildBitcast(Dst, NewVec);
2781  MI.eraseFromParent();
2782  return Legalized;
2783  }
2784 
2785  if (NewNumElts < OldNumElts) {
2786  if (NewEltSize % OldEltSize != 0)
2787  return UnableToLegalize;
2788 
2789  // This only depends on powers of 2 because we use bit tricks to figure out
2790  // the bit offset we need to shift to get the target element. A general
2791  // expansion could emit division/multiply.
2792  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2793  return UnableToLegalize;
2794 
2795  // Increasing the vector element size.
2796  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2797  //
2798  // =>
2799  //
2800  // %cast = G_BITCAST %vec
2801  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2802  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2803  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2804  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2805  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2806  // %elt = G_TRUNC %elt_bits
2807 
2808  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2809  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2810 
2811  // Divide to get the index in the wider element type.
2812  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2813 
2814  Register WideElt = CastVec;
2815  if (CastTy.isVector()) {
2816  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2817  ScaledIdx).getReg(0);
2818  }
2819 
2820  // Compute the bit offset into the register of the target element.
2822  MIRBuilder, Idx, NewEltSize, OldEltSize);
2823 
2824  // Shift the wide element to get the target element.
2825  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2826  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2827  MI.eraseFromParent();
2828  return Legalized;
2829  }
2830 
2831  return UnableToLegalize;
2832 }
2833 
2834 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2835 /// TargetReg, while preserving other bits in \p TargetReg.
2836 ///
2837 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2839  Register TargetReg, Register InsertReg,
2840  Register OffsetBits) {
2841  LLT TargetTy = B.getMRI()->getType(TargetReg);
2842  LLT InsertTy = B.getMRI()->getType(InsertReg);
2843  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2844  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2845 
2846  // Produce a bitmask of the value to insert
2847  auto EltMask = B.buildConstant(
2848  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2849  InsertTy.getSizeInBits()));
2850  // Shift it into position
2851  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2852  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2853 
2854  // Clear out the bits in the wide element
2855  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2856 
2857  // The value to insert has all zeros already, so stick it into the masked
2858  // wide element.
2859  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2860 }
2861 
2862 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2863 /// is increasing the element size, perform the indexing in the target element
2864 /// type, and use bit operations to insert at the element position. This is
2865 /// intended for architectures that can dynamically index the register file and
2866 /// want to force indexing in the native register size.
2869  LLT CastTy) {
2870  if (TypeIdx != 0)
2871  return UnableToLegalize;
2872 
2873  Register Dst = MI.getOperand(0).getReg();
2874  Register SrcVec = MI.getOperand(1).getReg();
2875  Register Val = MI.getOperand(2).getReg();
2876  Register Idx = MI.getOperand(3).getReg();
2877 
2878  LLT VecTy = MRI.getType(Dst);
2879  LLT IdxTy = MRI.getType(Idx);
2880 
2881  LLT VecEltTy = VecTy.getElementType();
2882  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2883  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2884  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2885 
2886  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2887  unsigned OldNumElts = VecTy.getNumElements();
2888 
2889  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2890  if (NewNumElts < OldNumElts) {
2891  if (NewEltSize % OldEltSize != 0)
2892  return UnableToLegalize;
2893 
2894  // This only depends on powers of 2 because we use bit tricks to figure out
2895  // the bit offset we need to shift to get the target element. A general
2896  // expansion could emit division/multiply.
2897  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2898  return UnableToLegalize;
2899 
2900  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2901  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2902 
2903  // Divide to get the index in the wider element type.
2904  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2905 
2906  Register ExtractedElt = CastVec;
2907  if (CastTy.isVector()) {
2908  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2909  ScaledIdx).getReg(0);
2910  }
2911 
2912  // Compute the bit offset into the register of the target element.
2914  MIRBuilder, Idx, NewEltSize, OldEltSize);
2915 
2916  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2917  Val, OffsetBits);
2918  if (CastTy.isVector()) {
2919  InsertedElt = MIRBuilder.buildInsertVectorElement(
2920  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2921  }
2922 
2923  MIRBuilder.buildBitcast(Dst, InsertedElt);
2924  MI.eraseFromParent();
2925  return Legalized;
2926  }
2927 
2928  return UnableToLegalize;
2929 }
2930 
2932  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2933  Register DstReg = LoadMI.getDstReg();
2934  Register PtrReg = LoadMI.getPointerReg();
2935  LLT DstTy = MRI.getType(DstReg);
2936  MachineMemOperand &MMO = LoadMI.getMMO();
2937  LLT MemTy = MMO.getMemoryType();
2939 
2940  unsigned MemSizeInBits = MemTy.getSizeInBits();
2941  unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2942 
2943  if (MemSizeInBits != MemStoreSizeInBits) {
2944  if (MemTy.isVector())
2945  return UnableToLegalize;
2946 
2947  // Promote to a byte-sized load if not loading an integral number of
2948  // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2949  LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2950  MachineMemOperand *NewMMO =
2951  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2952 
2953  Register LoadReg = DstReg;
2954  LLT LoadTy = DstTy;
2955 
2956  // If this wasn't already an extending load, we need to widen the result
2957  // register to avoid creating a load with a narrower result than the source.
2958  if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
2959  LoadTy = WideMemTy;
2960  LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
2961  }
2962 
2963  if (isa<GSExtLoad>(LoadMI)) {
2964  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2965  MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
2966  } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
2967  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2968  // The extra bits are guaranteed to be zero, since we stored them that
2969  // way. A zext load from Wide thus automatically gives zext from MemVT.
2970  MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
2971  } else {
2972  MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
2973  }
2974 
2975  if (DstTy != LoadTy)
2976  MIRBuilder.buildTrunc(DstReg, LoadReg);
2977 
2978  LoadMI.eraseFromParent();
2979  return Legalized;
2980  }
2981 
2982  // Big endian lowering not implemented.
2984  return UnableToLegalize;
2985 
2986  // This load needs splitting into power of 2 sized loads.
2987  //
2988  // Our strategy here is to generate anyextending loads for the smaller
2989  // types up to next power-2 result type, and then combine the two larger
2990  // result values together, before truncating back down to the non-pow-2
2991  // type.
2992  // E.g. v1 = i24 load =>
2993  // v2 = i32 zextload (2 byte)
2994  // v3 = i32 load (1 byte)
2995  // v4 = i32 shl v3, 16
2996  // v5 = i32 or v4, v2
2997  // v1 = i24 trunc v5
2998  // By doing this we generate the correct truncate which should get
2999  // combined away as an artifact with a matching extend.
3000 
3001  uint64_t LargeSplitSize, SmallSplitSize;
3002 
3003  if (!isPowerOf2_32(MemSizeInBits)) {
3004  // This load needs splitting into power of 2 sized loads.
3005  LargeSplitSize = PowerOf2Floor(MemSizeInBits);
3006  SmallSplitSize = MemSizeInBits - LargeSplitSize;
3007  } else {
3008  // This is already a power of 2, but we still need to split this in half.
3009  //
3010  // Assume we're being asked to decompose an unaligned load.
3011  // TODO: If this requires multiple splits, handle them all at once.
3012  auto &Ctx = MF.getFunction().getContext();
3013  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3014  return UnableToLegalize;
3015 
3016  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3017  }
3018 
3019  if (MemTy.isVector()) {
3020  // TODO: Handle vector extloads
3021  if (MemTy != DstTy)
3022  return UnableToLegalize;
3023 
3024  // TODO: We can do better than scalarizing the vector and at least split it
3025  // in half.
3026  return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3027  }
3028 
3029  MachineMemOperand *LargeMMO =
3030  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3031  MachineMemOperand *SmallMMO =
3032  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3033 
3034  LLT PtrTy = MRI.getType(PtrReg);
3035  unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3036  LLT AnyExtTy = LLT::scalar(AnyExtSize);
3037  auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3038  PtrReg, *LargeMMO);
3039 
3040  auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3041  LargeSplitSize / 8);
3042  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3043  auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3044  auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3045  SmallPtr, *SmallMMO);
3046 
3047  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3048  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3049 
3050  if (AnyExtTy == DstTy)
3051  MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3052  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3053  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3054  MIRBuilder.buildTrunc(DstReg, {Or});
3055  } else {
3056  assert(DstTy.isPointer() && "expected pointer");
3057  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3058 
3059  // FIXME: We currently consider this to be illegal for non-integral address
3060  // spaces, but we need still need a way to reinterpret the bits.
3061  MIRBuilder.buildIntToPtr(DstReg, Or);
3062  }
3063 
3064  LoadMI.eraseFromParent();
3065  return Legalized;
3066 }
3067 
3069  // Lower a non-power of 2 store into multiple pow-2 stores.
3070  // E.g. split an i24 store into an i16 store + i8 store.
3071  // We do this by first extending the stored value to the next largest power
3072  // of 2 type, and then using truncating stores to store the components.
3073  // By doing this, likewise with G_LOAD, generate an extend that can be
3074  // artifact-combined away instead of leaving behind extracts.
3075  Register SrcReg = StoreMI.getValueReg();
3076  Register PtrReg = StoreMI.getPointerReg();
3077  LLT SrcTy = MRI.getType(SrcReg);
3079  MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3080  LLT MemTy = MMO.getMemoryType();
3081 
3082  unsigned StoreWidth = MemTy.getSizeInBits();
3083  unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3084 
3085  if (StoreWidth != StoreSizeInBits) {
3086  if (SrcTy.isVector())
3087  return UnableToLegalize;
3088 
3089  // Promote to a byte-sized store with upper bits zero if not
3090  // storing an integral number of bytes. For example, promote
3091  // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3092  LLT WideTy = LLT::scalar(StoreSizeInBits);
3093 
3094  if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3095  // Avoid creating a store with a narrower source than result.
3096  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3097  SrcTy = WideTy;
3098  }
3099 
3100  auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3101 
3102  MachineMemOperand *NewMMO =
3103  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3104  MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3105  StoreMI.eraseFromParent();
3106  return Legalized;
3107  }
3108 
3109  if (MemTy.isVector()) {
3110  // TODO: Handle vector trunc stores
3111  if (MemTy != SrcTy)
3112  return UnableToLegalize;
3113 
3114  // TODO: We can do better than scalarizing the vector and at least split it
3115  // in half.
3116  return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3117  }
3118 
3119  unsigned MemSizeInBits = MemTy.getSizeInBits();
3120  uint64_t LargeSplitSize, SmallSplitSize;
3121 
3122  if (!isPowerOf2_32(MemSizeInBits)) {
3123  LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3124  SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3125  } else {
3126  auto &Ctx = MF.getFunction().getContext();
3127  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3128  return UnableToLegalize; // Don't know what we're being asked to do.
3129 
3130  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3131  }
3132 
3133  // Extend to the next pow-2. If this store was itself the result of lowering,
3134  // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3135  // that's wider than the stored size.
3136  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3137  const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3138 
3139  if (SrcTy.isPointer()) {
3140  const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3141  SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3142  }
3143 
3144  auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3145 
3146  // Obtain the smaller value by shifting away the larger value.
3147  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3148  auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3149 
3150  // Generate the PtrAdd and truncating stores.
3151  LLT PtrTy = MRI.getType(PtrReg);
3152  auto OffsetCst = MIRBuilder.buildConstant(
3153  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3154  auto SmallPtr =
3155  MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3156 
3157  MachineMemOperand *LargeMMO =
3158  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3159  MachineMemOperand *SmallMMO =
3160  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3161  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3162  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3163  StoreMI.eraseFromParent();
3164  return Legalized;
3165 }
3166 
3168 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3169  switch (MI.getOpcode()) {
3170  case TargetOpcode::G_LOAD: {
3171  if (TypeIdx != 0)
3172  return UnableToLegalize;
3173  MachineMemOperand &MMO = **MI.memoperands_begin();
3174 
3175  // Not sure how to interpret a bitcast of an extending load.
3176  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3177  return UnableToLegalize;
3178 
3180  bitcastDst(MI, CastTy, 0);
3181  MMO.setType(CastTy);
3183  return Legalized;
3184  }
3185  case TargetOpcode::G_STORE: {
3186  if (TypeIdx != 0)
3187  return UnableToLegalize;
3188 
3189  MachineMemOperand &MMO = **MI.memoperands_begin();
3190 
3191  // Not sure how to interpret a bitcast of a truncating store.
3192  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3193  return UnableToLegalize;
3194 
3196  bitcastSrc(MI, CastTy, 0);
3197  MMO.setType(CastTy);
3199  return Legalized;
3200  }
3201  case TargetOpcode::G_SELECT: {
3202  if (TypeIdx != 0)
3203  return UnableToLegalize;
3204 
3205  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3206  LLVM_DEBUG(
3207  dbgs() << "bitcast action not implemented for vector select\n");
3208  return UnableToLegalize;
3209  }
3210 
3212  bitcastSrc(MI, CastTy, 2);
3213  bitcastSrc(MI, CastTy, 3);
3214  bitcastDst(MI, CastTy, 0);
3216  return Legalized;
3217  }
3218  case TargetOpcode::G_AND:
3219  case TargetOpcode::G_OR:
3220  case TargetOpcode::G_XOR: {
3222  bitcastSrc(MI, CastTy, 1);
3223  bitcastSrc(MI, CastTy, 2);
3224  bitcastDst(MI, CastTy, 0);
3226  return Legalized;
3227  }
3228  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3229  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3230  case TargetOpcode::G_INSERT_VECTOR_ELT:
3231  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3232  default:
3233  return UnableToLegalize;
3234  }
3235 }
3236 
3237 // Legalize an instruction by changing the opcode in place.
3238 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3240  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3242 }
3243 
3245 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3246  using namespace TargetOpcode;
3247 
3248  switch(MI.getOpcode()) {
3249  default:
3250  return UnableToLegalize;
3251  case TargetOpcode::G_BITCAST:
3252  return lowerBitcast(MI);
3253  case TargetOpcode::G_SREM:
3254  case TargetOpcode::G_UREM: {
3255  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3256  auto Quot =
3257  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3258  {MI.getOperand(1), MI.getOperand(2)});
3259 
3260  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3261  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3262  MI.eraseFromParent();
3263  return Legalized;
3264  }
3265  case TargetOpcode::G_SADDO:
3266  case TargetOpcode::G_SSUBO:
3267  return lowerSADDO_SSUBO(MI);
3268  case TargetOpcode::G_UMULH:
3269  case TargetOpcode::G_SMULH:
3270  return lowerSMULH_UMULH(MI);
3271  case TargetOpcode::G_SMULO:
3272  case TargetOpcode::G_UMULO: {
3273  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3274  // result.
3275  Register Res = MI.getOperand(0).getReg();
3276  Register Overflow = MI.getOperand(1).getReg();
3277  Register LHS = MI.getOperand(2).getReg();
3278  Register RHS = MI.getOperand(3).getReg();
3279  LLT Ty = MRI.getType(Res);
3280 
3281  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3282  ? TargetOpcode::G_SMULH
3283  : TargetOpcode::G_UMULH;
3284 
3286  const auto &TII = MIRBuilder.getTII();
3287  MI.setDesc(TII.get(TargetOpcode::G_MUL));
3288  MI.removeOperand(1);
3290 
3291  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3292  auto Zero = MIRBuilder.buildConstant(Ty, 0);
3293 
3294  // Move insert point forward so we can use the Res register if needed.
3296 
3297  // For *signed* multiply, overflow is detected by checking:
3298  // (hi != (lo >> bitwidth-1))
3299  if (Opcode == TargetOpcode::G_SMULH) {
3300  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3301  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3302  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3303  } else {
3304  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3305  }
3306  return Legalized;
3307  }
3308  case TargetOpcode::G_FNEG: {
3309  Register Res = MI.getOperand(0).getReg();
3310  LLT Ty = MRI.getType(Res);
3311 
3312  // TODO: Handle vector types once we are able to
3313  // represent them.
3314  if (Ty.isVector())
3315  return UnableToLegalize;
3316  auto SignMask =
3318  Register SubByReg = MI.getOperand(1).getReg();
3319  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3320  MI.eraseFromParent();
3321  return Legalized;
3322  }
3323  case TargetOpcode::G_FSUB:
3324  case TargetOpcode::G_STRICT_FSUB: {
3325  Register Res = MI.getOperand(0).getReg();
3326  LLT Ty = MRI.getType(Res);
3327 
3328  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3329  // First, check if G_FNEG is marked as Lower. If so, we may
3330  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3331  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3332  return UnableToLegalize;
3333  Register LHS = MI.getOperand(1).getReg();
3334  Register RHS = MI.getOperand(2).getReg();
3335  auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3336 
3337  if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3338  MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3339  else
3340  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3341 
3342  MI.eraseFromParent();
3343  return Legalized;
3344  }
3345  case TargetOpcode::G_FMAD:
3346  return lowerFMad(MI);
3347  case TargetOpcode::G_FFLOOR:
3348  return lowerFFloor(MI);
3349  case TargetOpcode::G_INTRINSIC_ROUND:
3350  return lowerIntrinsicRound(MI);
3351  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3352  // Since round even is the assumed rounding mode for unconstrained FP
3353  // operations, rint and roundeven are the same operation.
3354  changeOpcode(MI, TargetOpcode::G_FRINT);
3355  return Legalized;
3356  }
3357  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3358  Register OldValRes = MI.getOperand(0).getReg();
3359  Register SuccessRes = MI.getOperand(1).getReg();
3360  Register Addr = MI.getOperand(2).getReg();
3361  Register CmpVal = MI.getOperand(3).getReg();
3362  Register NewVal = MI.getOperand(4).getReg();
3363  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3364  **MI.memoperands_begin());
3365  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3366  MI.eraseFromParent();
3367  return Legalized;
3368  }
3369  case TargetOpcode::G_LOAD:
3370  case TargetOpcode::G_SEXTLOAD:
3371  case TargetOpcode::G_ZEXTLOAD:
3372  return lowerLoad(cast<GAnyLoad>(MI));
3373  case TargetOpcode::G_STORE:
3374  return lowerStore(cast<GStore>(MI));
3375  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3376  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3377  case TargetOpcode::G_CTLZ:
3378  case TargetOpcode::G_CTTZ:
3379  case TargetOpcode::G_CTPOP:
3380  return lowerBitCount(MI);
3381  case G_UADDO: {
3382  Register Res = MI.getOperand(0).getReg();
3383  Register CarryOut = MI.getOperand(1).getReg();
3384  Register LHS = MI.getOperand(2).getReg();
3385  Register RHS = MI.getOperand(3).getReg();
3386 
3387  MIRBuilder.buildAdd(Res, LHS, RHS);
3388  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3389 
3390  MI.eraseFromParent();
3391  return Legalized;
3392  }
3393  case G_UADDE: {
3394  Register Res = MI.getOperand(0).getReg();
3395  Register CarryOut = MI.getOperand(1).getReg();
3396  Register LHS = MI.getOperand(2).getReg();
3397  Register RHS = MI.getOperand(3).getReg();
3398  Register CarryIn = MI.getOperand(4).getReg();
3399  LLT Ty = MRI.getType(Res);
3400 
3401  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3402  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3403  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3404  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3405 
3406  MI.eraseFromParent();
3407  return Legalized;
3408  }
3409  case G_USUBO: {
3410  Register Res = MI.getOperand(0).getReg();
3411  Register BorrowOut = MI.getOperand(1).getReg();
3412  Register LHS = MI.getOperand(2).getReg();
3413  Register RHS = MI.getOperand(3).getReg();
3414 
3415  MIRBuilder.buildSub(Res, LHS, RHS);
3417 
3418  MI.eraseFromParent();
3419  return Legalized;
3420  }
3421  case G_USUBE: {
3422  Register Res = MI.getOperand(0).getReg();
3423  Register BorrowOut = MI.getOperand(1).getReg();
3424  Register LHS = MI.getOperand(2).getReg();
3425  Register RHS = MI.getOperand(3).getReg();
3426  Register BorrowIn = MI.getOperand(4).getReg();
3427  const LLT CondTy = MRI.getType(BorrowOut);
3428  const LLT Ty = MRI.getType(Res);
3429 
3430  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3431  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3432  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3433 
3434  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3435  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3436  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3437 
3438  MI.eraseFromParent();
3439  return Legalized;
3440  }
3441  case G_UITOFP:
3442  return lowerUITOFP(MI);
3443  case G_SITOFP:
3444  return lowerSITOFP(MI);
3445  case G_FPTOUI:
3446  return lowerFPTOUI(MI);
3447  case G_FPTOSI:
3448  return lowerFPTOSI(MI);
3449  case G_FPTRUNC:
3450  return lowerFPTRUNC(MI);
3451  case G_FPOWI:
3452  return lowerFPOWI(MI);
3453  case G_SMIN:
3454  case G_SMAX:
3455  case G_UMIN:
3456  case G_UMAX:
3457  return lowerMinMax(MI);
3458  case G_FCOPYSIGN:
3459  return lowerFCopySign(MI);
3460  case G_FMINNUM:
3461  case G_FMAXNUM:
3462  return lowerFMinNumMaxNum(MI);
3463  case G_MERGE_VALUES:
3464  return lowerMergeValues(MI);
3465  case G_UNMERGE_VALUES:
3466  return lowerUnmergeValues(MI);
3467  case TargetOpcode::G_SEXT_INREG: {
3468  assert(MI.getOperand(2).isImm() && "Expected immediate");
3469  int64_t SizeInBits = MI.getOperand(2).getImm();
3470 
3471  Register DstReg = MI.getOperand(0).getReg();
3472  Register SrcReg = MI.getOperand(1).getReg();
3473  LLT DstTy = MRI.getType(DstReg);
3474  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3475 
3476  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3477  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3478  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3479  MI.eraseFromParent();
3480  return Legalized;
3481  }
3482  case G_EXTRACT_VECTOR_ELT:
3483  case G_INSERT_VECTOR_ELT:
3485  case G_SHUFFLE_VECTOR:
3486  return lowerShuffleVector(MI);
3487  case G_DYN_STACKALLOC:
3488  return lowerDynStackAlloc(MI);
3489  case G_EXTRACT:
3490  return lowerExtract(MI);
3491  case G_INSERT:
3492  return lowerInsert(MI);
3493  case G_BSWAP:
3494  return lowerBswap(MI);
3495  case G_BITREVERSE:
3496  return lowerBitreverse(MI);
3497  case G_READ_REGISTER:
3498  case G_WRITE_REGISTER:
3499  return lowerReadWriteRegister(MI);
3500  case G_UADDSAT:
3501  case G_USUBSAT: {
3502  // Try to make a reasonable guess about which lowering strategy to use. The
3503  // target can override this with custom lowering and calling the
3504  // implementation functions.
3505  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3506  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3507  return lowerAddSubSatToMinMax(MI);
3508  return lowerAddSubSatToAddoSubo(MI);
3509  }
3510  case G_SADDSAT:
3511  case G_SSUBSAT: {
3512  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3513 
3514  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3515  // since it's a shorter expansion. However, we would need to figure out the
3516  // preferred boolean type for the carry out for the query.
3517  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3518  return lowerAddSubSatToMinMax(MI);
3519  return lowerAddSubSatToAddoSubo(MI);
3520  }
3521  case G_SSHLSAT:
3522  case G_USHLSAT:
3523  return lowerShlSat(MI);
3524  case G_ABS:
3525  return lowerAbsToAddXor(MI);
3526  case G_SELECT:
3527  return lowerSelect(MI);
3528  case G_SDIVREM:
3529  case G_UDIVREM:
3530  return lowerDIVREM(MI);
3531  case G_FSHL:
3532  case G_FSHR:
3533  return lowerFunnelShift(MI);
3534  case G_ROTL:
3535  case G_ROTR:
3536  return lowerRotate(MI);
3537  case G_MEMSET:
3538  case G_MEMCPY:
3539  case G_MEMMOVE:
3540  return lowerMemCpyFamily(MI);
3541  case G_MEMCPY_INLINE:
3542  return lowerMemcpyInline(MI);
3544  return lowerVectorReduction(MI);
3545  }
3546 }
3547 
3549  Align MinAlign) const {
3550  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3551  // datalayout for the preferred alignment. Also there should be a target hook
3552  // for this to allow targets to reduce the alignment and ignore the
3553  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3554  // the type.
3556 }
3557 
3560  MachinePointerInfo &PtrInfo) {
3563  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3564 
3565  unsigned AddrSpace = DL.getAllocaAddrSpace();
3566  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3567 
3568  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3569  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3570 }
3571 
3573  LLT VecTy) {
3574  int64_t IdxVal;
3575  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3576  return IdxReg;
3577 
3578  LLT IdxTy = B.getMRI()->getType(IdxReg);
3579  unsigned NElts = VecTy.getNumElements();
3580  if (isPowerOf2_32(NElts)) {
3581  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3582  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3583  }
3584 
3585  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3586  .getReg(0);
3587 }
3588 
3590  Register Index) {
3591  LLT EltTy = VecTy.getElementType();
3592 
3593  // Calculate the element offset and add it to the pointer.
3594  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3595  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3596  "Converting bits to bytes lost precision");
3597 
3599 
3600  LLT IdxTy = MRI.getType(Index);
3601  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3602  MIRBuilder.buildConstant(IdxTy, EltSize));
3603 
3604  LLT PtrTy = MRI.getType(VecPtr);
3605  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3606 }
3607 
3608 #ifndef NDEBUG
3609 /// Check that all vector operands have same number of elements. Other operands
3610 /// should be listed in NonVecOp.
3613  std::initializer_list<unsigned> NonVecOpIndices) {
3614  if (MI.getNumMemOperands() != 0)
3615  return false;
3616 
3617  LLT VecTy = MRI.getType(MI.getReg(0));
3618  if (!VecTy.isVector())
3619  return false;
3620  unsigned NumElts = VecTy.getNumElements();
3621 
3622  for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
3623  MachineOperand &Op = MI.getOperand(OpIdx);
3624  if (!Op.isReg()) {
3625  if (!is_contained(NonVecOpIndices, OpIdx))
3626  return false;
3627  continue;
3628  }
3629 
3630  LLT Ty = MRI.getType(Op.getReg());
3631  if (!Ty.isVector()) {
3632  if (!is_contained(NonVecOpIndices, OpIdx))
3633  return false;
3634  continue;
3635  }
3636 
3637  if (Ty.getNumElements() != NumElts)
3638  return false;
3639  }
3640 
3641  return true;
3642 }
3643 #endif
3644 
3645 /// Fill \p DstOps with DstOps that have same number of elements combined as
3646 /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
3647 /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
3648 /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
3649 static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
3650  unsigned NumElts) {
3651  LLT LeftoverTy;
3652  assert(Ty.isVector() && "Expected vector type");
3653  LLT EltTy = Ty.getElementType();
3654  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
3655  int NumParts, NumLeftover;
3656  std::tie(NumParts, NumLeftover) =
3657  getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
3658 
3659  assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
3660  for (int i = 0; i < NumParts; ++i) {
3661  DstOps.push_back(NarrowTy);
3662  }
3663 
3664  if (LeftoverTy.isValid()) {
3665  assert(NumLeftover == 1 && "expected exactly one leftover");
3666  DstOps.push_back(LeftoverTy);
3667  }
3668 }
3669 
3670 /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
3671 /// made from \p Op depending on operand type.
3672 static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
3673  MachineOperand &Op) {
3674  for (unsigned i = 0; i < N; ++i) {
3675  if (Op.isReg())
3676  Ops.push_back(Op.getReg());
3677  else if (Op.isImm())
3678  Ops.push_back(Op.getImm());
3679  else if (Op.isPredicate())
3680  Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
3681  else
3682  llvm_unreachable("Unsupported type");
3683  }
3684 }
3685 
3686 // Handle splitting vector operations which need to have the same number of
3687 // elements in each type index, but each type index may have a different element
3688 // type.
3689 //
3690 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3691 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3692 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3693 //
3694 // Also handles some irregular breakdown cases, e.g.
3695 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3696 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3697 // s64 = G_SHL s64, s32
3700  GenericMachineInstr &MI, unsigned NumElts,
3701  std::initializer_list<unsigned> NonVecOpIndices) {
3702  assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
3703  "Non-compatible opcode or not specified non-vector operands");
3704  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3705 
3706  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3707  unsigned NumDefs = MI.getNumDefs();
3708 
3709  // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
3710  // Build instructions with DstOps to use instruction found by CSE directly.
3711  // CSE copies found instruction into given vreg when building with vreg dest.
3712  SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
3713  // Output registers will be taken from created instructions.
3714  SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
3715  for (unsigned i = 0; i < NumDefs; ++i) {
3716  makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
3717  }
3718 
3719  // Split vector input operands into sub-vectors with NumElts elts + Leftover.
3720  // Operands listed in NonVecOpIndices will be used as is without splitting;
3721  // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
3722  // scalar condition (op 1), immediate in sext_inreg (op 2).
3723  SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
3724  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3725  ++UseIdx, ++UseNo) {
3726  if (is_contained(NonVecOpIndices, UseIdx)) {
3727  broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
3728  MI.getOperand(UseIdx));
3729  } else {
3730  SmallVector<Register, 8> SplitPieces;
3731  extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
3732  for (auto Reg : SplitPieces)
3733  InputOpsPieces[UseNo].push_back(Reg);
3734  }
3735  }
3736 
3737  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3738 
3739  // Take i-th piece of each input operand split and build sub-vector/scalar
3740  // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
3741  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3742  SmallVector<DstOp, 2> Defs;
3743  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3744  Defs.push_back(OutputOpsPieces[DstNo][i]);
3745 
3747  for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3748  Uses.push_back(InputOpsPieces[InputNo][i]);
3749 
3750  auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
3751  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3752  OutputRegs[DstNo].push_back(I.getReg(DstNo));
3753  }
3754 
3755  // Merge small outputs into MI's output for each def operand.
3756  if (NumLeftovers) {
3757  for (unsigned i = 0; i < NumDefs; ++i)
3758  mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
3759  } else {
3760  for (unsigned i = 0; i < NumDefs; ++i)
3761  MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
3762  }
3763 
3764  MI.eraseFromParent();
3765  return Legalized;
3766 }
3767 
3770  unsigned NumElts) {
3771  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3772 
3773  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3774  unsigned NumDefs = MI.getNumDefs();
3775 
3776  SmallVector<DstOp, 8> OutputOpsPieces;
3777  SmallVector<Register, 8> OutputRegs;
3778  makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
3779 
3780  // Instructions that perform register split will be inserted in basic block
3781  // where register is defined (basic block is in the next operand).
3782  SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
3783  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3784  UseIdx += 2, ++UseNo) {
3785  MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
3786  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3787  extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3788  }
3789 
3790  // Build PHIs with fewer elements.
3791  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3792  MIRBuilder.setInsertPt(*MI.getParent(), MI);
3793  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3794  auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
3795  Phi.addDef(
3796  MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
3797  OutputRegs.push_back(Phi.getReg(0));
3798 
3799  for (unsigned j = 0; j < NumInputs / 2; ++j) {
3800  Phi.addUse(InputOpsPieces[j][i]);
3801  Phi.add(MI.getOperand(1 + j * 2 + 1));
3802  }
3803  }
3804 
3805  // Merge small outputs into MI's def.
3806  if (NumLeftovers) {
3807  mergeMixedSubvectors(MI.getReg(0), OutputRegs);
3808  } else {
3809  MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
3810  }
3811 
3812  MI.eraseFromParent();
3813  return Legalized;
3814 }
3815 
3818  unsigned TypeIdx,
3819  LLT NarrowTy) {
3820  const int NumDst = MI.getNumOperands() - 1;
3821  const Register SrcReg = MI.getOperand(NumDst).getReg();
3822  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3823  LLT SrcTy = MRI.getType(SrcReg);
3824 
3825  if (TypeIdx != 1 || NarrowTy == DstTy)
3826  return UnableToLegalize;
3827 
3828  // Requires compatible types. Otherwise SrcReg should have been defined by
3829  // merge-like instruction that would get artifact combined. Most likely
3830  // instruction that defines SrcReg has to perform more/fewer elements
3831  // legalization compatible with NarrowTy.
3832  assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3833  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3834 
3835  if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3836  (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
3837  return UnableToLegalize;
3838 
3839  // This is most likely DstTy (smaller then register size) packed in SrcTy
3840  // (larger then register size) and since unmerge was not combined it will be
3841  // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
3842  // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
3843 
3844  // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
3845  //
3846  // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
3847  // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
3848  // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
3849  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
3850  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3851  const int PartsPerUnmerge = NumDst / NumUnmerge;
3852 
3853  for (int I = 0; I != NumUnmerge; ++I) {
3854  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3855 
3856  for (int J = 0; J != PartsPerUnmerge; ++J)
3857  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3858  MIB.addUse(Unmerge.getReg(I));
3859  }
3860 
3861  MI.eraseFromParent();
3862  return Legalized;
3863 }
3864 
3867  LLT NarrowTy) {
3868  Register DstReg = MI.getOperand(0).getReg();
3869  LLT DstTy = MRI.getType(DstReg);
3870  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3871  // Requires compatible types. Otherwise user of DstReg did not perform unmerge
3872  // that should have been artifact combined. Most likely instruction that uses
3873  // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
3874  assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3875  assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3876  if (NarrowTy == SrcTy)
3877  return UnableToLegalize;
3878 
3879  // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
3880  // is for old mir tests. Since the changes to more/fewer elements it should no
3881  // longer be possible to generate MIR like this when starting from llvm-ir
3882  // because LCMTy approach was replaced with merge/unmerge to vector elements.
3883  if (TypeIdx == 1) {
3884  assert(SrcTy.isVector() && "Expected vector types");
3885  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3886  if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3887  (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
3888  return UnableToLegalize;
3889  // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
3890  //
3891  // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
3892  // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
3893  // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
3894  // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
3895  // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
3896  // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
3897 
3899  LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
3900  for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
3901  auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
3902  for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3903  Elts.push_back(Unmerge.getReg(j));
3904  }
3905 
3906  SmallVector<Register, 8> NarrowTyElts;
3907  unsigned NumNarrowTyElts = NarrowTy.getNumElements();
3908  unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3909  for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
3910  ++i, Offset += NumNarrowTyElts) {
3911  ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
3912  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
3913  }
3914 
3915  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3916  MI.eraseFromParent();
3917  return Legalized;
3918  }
3919 
3920  assert(TypeIdx == 0 && "Bad type index");
3921  if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
3922  (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
3923  return UnableToLegalize;
3924 
3925  // This is most likely SrcTy (smaller then register size) packed in DstTy
3926  // (larger then register size) and since merge was not combined it will be
3927  // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
3928  // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
3929 
3930  // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
3931  //
3932  // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
3933  // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
3934  // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
3935  SmallVector<Register, 8> NarrowTyElts;
3936  unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3937  unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
3938  unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
3939  for (unsigned i = 0; i < NumParts; ++i) {
3940  SmallVector<Register, 8> Sources;
3941  for (unsigned j = 0; j < NumElts; ++j)
3942  Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
3943  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
3944  }
3945 
3946  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3947  MI.eraseFromParent();
3948  return Legalized;
3949 }
3950 
3953  unsigned TypeIdx,
3954  LLT NarrowVecTy) {
3955  Register DstReg = MI.getOperand(0).getReg();
3956  Register SrcVec = MI.getOperand(1).getReg();
3957  Register InsertVal;
3958  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3959 
3960  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3961  if (IsInsert)
3962  InsertVal = MI.getOperand(2).getReg();
3963 
3964  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3965 
3966  // TODO: Handle total scalarization case.
3967  if (!NarrowVecTy.isVector())
3968  return UnableToLegalize;
3969 
3970  LLT VecTy = MRI.getType(SrcVec);
3971 
3972  // If the index is a constant, we can really break this down as you would
3973  // expect, and index into the target size pieces.
3974  int64_t IdxVal;
3975  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
3976  if (MaybeCst) {
3977  IdxVal = MaybeCst->Value.getSExtValue();
3978  // Avoid out of bounds indexing the pieces.
3979  if (IdxVal >= VecTy.getNumElements()) {
3980  MIRBuilder.buildUndef(DstReg);
3981  MI.eraseFromParent();
3982  return Legalized;
3983  }
3984 
3985  SmallVector<Register, 8> VecParts;
3986  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3987 
3988  // Build a sequence of NarrowTy pieces in VecParts for this operand.
3989  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3990  TargetOpcode::G_ANYEXT);
3991 
3992  unsigned NewNumElts = NarrowVecTy.getNumElements();
3993 
3994  LLT IdxTy = MRI.getType(Idx);
3995  int64_t PartIdx = IdxVal / NewNumElts;
3996  auto NewIdx =
3997  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3998 
3999  if (IsInsert) {
4000  LLT PartTy = MRI.getType(VecParts[PartIdx]);
4001 
4002  // Use the adjusted index to insert into one of the subvectors.
4003  auto InsertPart = MIRBuilder.buildInsertVectorElement(
4004  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4005  VecParts[PartIdx] = InsertPart.getReg(0);
4006 
4007  // Recombine the inserted subvector with the others to reform the result
4008  // vector.
4009  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4010  } else {
4011  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4012  }
4013 
4014  MI.eraseFromParent();
4015  return Legalized;
4016  }
4017 
4018  // With a variable index, we can't perform the operation in a smaller type, so
4019  // we're forced to expand this.
4020  //
4021  // TODO: We could emit a chain of compare/select to figure out which piece to
4022  // index.
4024 }
4025 
4028  LLT NarrowTy) {
4029  // FIXME: Don't know how to handle secondary types yet.
4030  if (TypeIdx != 0)
4031  return UnableToLegalize;
4032 
4033  // This implementation doesn't work for atomics. Give up instead of doing
4034  // something invalid.
4035  if (LdStMI.isAtomic())
4036  return UnableToLegalize;
4037 
4038  bool IsLoad = isa<GLoad>(LdStMI);
4039  Register ValReg = LdStMI.getReg(0);
4040  Register AddrReg = LdStMI.getPointerReg();
4041  LLT ValTy = MRI.getType(ValReg);
4042 
4043  // FIXME: Do we need a distinct NarrowMemory legalize action?
4044  if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4045  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4046  return UnableToLegalize;
4047  }
4048 
4049  int NumParts = -1;
4050  int NumLeftover = -1;
4051  LLT LeftoverTy;
4052  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4053  if (IsLoad) {
4054  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4055  } else {
4056  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4057  NarrowLeftoverRegs)) {
4058  NumParts = NarrowRegs.size();
4059  NumLeftover = NarrowLeftoverRegs.size();
4060  }
4061  }
4062 
4063  if (NumParts == -1)
4064  return UnableToLegalize;
4065 
4066  LLT PtrTy = MRI.getType(AddrReg);
4067  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4068 
4069  unsigned TotalSize = ValTy.getSizeInBits();
4070 
4071  // Split the load/store into PartTy sized pieces starting at Offset. If this
4072  // is a load, return the new registers in ValRegs. For a store, each elements
4073  // of ValRegs should be PartTy. Returns the next offset that needs to be
4074  // handled.
4076  auto MMO = LdStMI.getMMO();
4077  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4078  unsigned NumParts, unsigned Offset) -> unsigned {
4080  unsigned PartSize = PartTy.getSizeInBits();
4081  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4082  ++Idx) {
4083  unsigned ByteOffset = Offset / 8;
4084  Register NewAddrReg;
4085 
4086  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4087 
4088  MachineMemOperand *NewMMO =
4089  MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4090 
4091  if (IsLoad) {
4092  Register Dst = MRI.createGenericVirtualRegister(PartTy);
4093  ValRegs.push_back(Dst);
4094  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4095  } else {
4096  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4097  }
4098  Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4099  }
4100 
4101  return Offset;
4102  };
4103 
4104  unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4105  unsigned HandledOffset =
4106  splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4107 
4108  // Handle the rest of the register if this isn't an even type breakdown.
4109  if (LeftoverTy.isValid())
4110  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4111 
4112  if (IsLoad) {
4113  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4114  LeftoverTy, NarrowLeftoverRegs);
4115  }
4116 
4117  LdStMI.eraseFromParent();
4118  return Legalized;
4119 }
4120 
4123  LLT NarrowTy) {
4124  using namespace TargetOpcode;
4125  GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4126  unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4127 
4128  switch (MI.getOpcode()) {
4129  case G_IMPLICIT_DEF:
4130  case G_TRUNC:
4131  case G_AND:
4132  case G_OR:
4133  case G_XOR:
4134  case G_ADD:
4135  case G_SUB:
4136  case G_MUL:
4137  case G_PTR_ADD:
4138  case G_SMULH:
4139  case G_UMULH:
4140  case G_FADD:
4141  case G_FMUL:
4142  case G_FSUB:
4143  case G_FNEG:
4144  case G_FABS:
4145  case G_FCANONICALIZE:
4146  case G_FDIV:
4147  case G_FREM:
4148  case G_FMA:
4149  case G_FMAD:
4150  case G_FPOW:
4151  case G_FEXP:
4152  case G_FEXP2:
4153  case G_FLOG:
4154  case G_FLOG2:
4155  case G_FLOG10:
4156  case G_FNEARBYINT:
4157  case G_FCEIL:
4158  case G_FFLOOR:
4159  case G_FRINT:
4160  case G_INTRINSIC_ROUND:
4161  case G_INTRINSIC_ROUNDEVEN:
4162  case G_INTRINSIC_TRUNC:
4163  case G_FCOS:
4164  case G_FSIN:
4165  case G_FSQRT:
4166  case G_BSWAP:
4167  case G_BITREVERSE:
4168  case G_SDIV:
4169  case G_UDIV:
4170  case G_SREM:
4171  case G_UREM:
4172  case G_SDIVREM:
4173  case G_UDIVREM:
4174  case G_SMIN:
4175  case G_SMAX:
4176  case G_UMIN:
4177  case G_UMAX:
4178  case G_ABS:
4179  case G_FMINNUM:
4180  case G_FMAXNUM:
4181  case G_FMINNUM_IEEE:
4182  case G_FMAXNUM_IEEE:
4183  case G_FMINIMUM:
4184  case G_FMAXIMUM:
4185  case G_FSHL:
4186  case G_FSHR:
4187  case G_ROTL:
4188  case G_ROTR:
4189  case G_FREEZE:
4190  case G_SADDSAT:
4191  case G_SSUBSAT:
4192  case G_UADDSAT:
4193  case G_USUBSAT:
4194  case G_UMULO:
4195  case G_SMULO:
4196  case G_SHL:
4197  case G_LSHR:
4198  case G_ASHR:
4199  case G_SSHLSAT:
4200  case G_USHLSAT:
4201  case G_CTLZ:
4202  case G_CTLZ_ZERO_UNDEF:
4203  case G_CTTZ:
4204  case G_CTTZ_ZERO_UNDEF:
4205  case G_CTPOP:
4206  case G_FCOPYSIGN:
4207  case G_ZEXT:
4208  case G_SEXT:
4209  case G_ANYEXT:
4210  case G_FPEXT:
4211  case G_FPTRUNC:
4212  case G_SITOFP:
4213  case G_UITOFP:
4214  case G_FPTOSI:
4215  case G_FPTOUI:
4216  case G_INTTOPTR:
4217  case G_PTRTOINT:
4218  case G_ADDRSPACE_CAST:
4219  case G_UADDO:
4220  case G_USUBO:
4221  case G_UADDE:
4222  case G_USUBE:
4223  case G_SADDO:
4224  case G_SSUBO:
4225  case G_SADDE:
4226  case G_SSUBE:
4227  case G_STRICT_FADD:
4228  case G_STRICT_FSUB:
4229  case G_STRICT_FMUL:
4230  case G_STRICT_FMA:
4231  return fewerElementsVectorMultiEltType(GMI, NumElts);
4232  case G_ICMP:
4233  case G_FCMP:
4234  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4235  case G_SELECT:
4236  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4237  return fewerElementsVectorMultiEltType(GMI, NumElts);
4238  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4239  case G_PHI:
4240  return fewerElementsVectorPhi(GMI, NumElts);
4241  case G_UNMERGE_VALUES:
4242  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4243  case G_BUILD_VECTOR:
4244  assert(TypeIdx == 0 && "not a vector type index");
4245  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4246  case G_CONCAT_VECTORS:
4247  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4248  return UnableToLegalize;
4249  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4250  case G_EXTRACT_VECTOR_ELT:
4251  case G_INSERT_VECTOR_ELT:
4252  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4253  case G_LOAD:
4254  case G_STORE:
4255  return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4256  case G_SEXT_INREG:
4257  return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4259  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4260  case G_SHUFFLE_VECTOR:
4261  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4262  default:
4263  return UnableToLegalize;
4264  }
4265 }
4266 
4268  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4269  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4270  if (TypeIdx != 0)
4271  return UnableToLegalize;
4272 
4273  Register DstReg = MI.getOperand(0).getReg();
4274  Register Src1Reg = MI.getOperand(1).getReg();
4275  Register Src2Reg = MI.getOperand(2).getReg();
4276  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4277  LLT DstTy = MRI.getType(DstReg);
4278  LLT Src1Ty = MRI.getType(Src1Reg);
4279  LLT Src2Ty = MRI.getType(Src2Reg);
4280  // The shuffle should be canonicalized by now.
4281  if (DstTy != Src1Ty)
4282  return UnableToLegalize;
4283  if (DstTy != Src2Ty)
4284  return UnableToLegalize;
4285 
4286  if (!isPowerOf2_32(DstTy.getNumElements()))
4287  return UnableToLegalize;
4288 
4289  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4290  // Further legalization attempts will be needed to do split further.
4291  NarrowTy =
4293  unsigned NewElts = NarrowTy.getNumElements();
4294 
4295  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4296  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4297  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4298  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4299  SplitSrc2Regs[1]};
4300 
4301  Register Hi, Lo;
4302 
4303  // If Lo or Hi uses elements from at most two of the four input vectors, then
4304  // express it as a vector shuffle of those two inputs. Otherwise extract the
4305  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4307  for (unsigned High = 0; High < 2; ++High) {
4308  Register &Output = High ? Hi : Lo;
4309 
4310  // Build a shuffle mask for the output, discovering on the fly which
4311  // input vectors to use as shuffle operands (recorded in InputUsed).
4312  // If building a suitable shuffle vector proves too hard, then bail
4313  // out with useBuildVector set.
4314  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4315  unsigned FirstMaskIdx = High * NewElts;
4316  bool UseBuildVector = false;
4317  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4318  // The mask element. This indexes into the input.
4319  int Idx = Mask[FirstMaskIdx + MaskOffset];
4320 
4321  // The input vector this mask element indexes into.
4322  unsigned Input = (unsigned)Idx / NewElts;
4323 
4324  if (Input >= std::size(Inputs)) {
4325  // The mask element does not index into any input vector.
4326  Ops.push_back(-1);
4327  continue;
4328  }
4329 
4330  // Turn the index into an offset from the start of the input vector.
4331  Idx -= Input * NewElts;
4332 
4333  // Find or create a shuffle vector operand to hold this input.
4334  unsigned OpNo;
4335  for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4336  if (InputUsed[OpNo] == Input) {
4337  // This input vector is already an operand.
4338  break;
4339  } else if (InputUsed[OpNo] == -1U) {
4340  // Create a new operand for this input vector.
4341  InputUsed[OpNo] = Input;
4342  break;
4343  }
4344  }
4345 
4346  if (OpNo >= std::size(InputUsed)) {
4347  // More than two input vectors used! Give up on trying to create a
4348  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4349  UseBuildVector = true;
4350  break;
4351  }
4352 
4353  // Add the mask index for the new shuffle vector.
4354  Ops.push_back(Idx + OpNo * NewElts);
4355  }
4356 
4357  if (UseBuildVector) {
4358  LLT EltTy = NarrowTy.getElementType();
4360 
4361  // Extract the input elements by hand.
4362  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4363  // The mask element. This indexes into the input.
4364  int Idx = Mask[FirstMaskIdx + MaskOffset];
4365 
4366  // The input vector this mask element indexes into.
4367  unsigned Input = (unsigned)Idx / NewElts;
4368 
4369  if (Input >= std::size(Inputs)) {
4370  // The mask element is "undef" or indexes off the end of the input.
4371  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4372  continue;
4373  }
4374 
4375  // Turn the index into an offset from the start of the input vector.
4376  Idx -= Input * NewElts;
4377 
4378  // Extract the vector element by hand.
4379  SVOps.push_back(MIRBuilder
4380  .buildExtractVectorElement(
4381  EltTy, Inputs[Input],
4383  .getReg(0));
4384  }
4385 
4386  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4387  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4388  } else if (InputUsed[0] == -1U) {
4389  // No input vectors were used! The result is undefined.
4390  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4391  } else {
4392  Register Op0 = Inputs[InputUsed[0]];
4393  // If only one input was used, use an undefined vector for the other.
4394  Register Op1 = InputUsed[1] == -1U
4395  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4396  : Inputs[InputUsed[1]];
4397  // At least one input vector was used. Create a new shuffle vector.
4398  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4399  }
4400 
4401  Ops.clear();
4402  }
4403 
4404  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4405  MI.eraseFromParent();
4406  return Legalized;
4407 }
4408 
4409 static unsigned getScalarOpcForReduction(unsigned Opc) {
4410  unsigned ScalarOpc;
4411  switch (Opc) {
4412  case TargetOpcode::G_VECREDUCE_FADD:
4413  ScalarOpc = TargetOpcode::G_FADD;
4414  break;
4415  case TargetOpcode::G_VECREDUCE_FMUL:
4416  ScalarOpc = TargetOpcode::G_FMUL;
4417  break;
4418  case TargetOpcode::G_VECREDUCE_FMAX:
4419  ScalarOpc = TargetOpcode::G_FMAXNUM;
4420  break;
4421  case TargetOpcode::G_VECREDUCE_FMIN:
4422  ScalarOpc = TargetOpcode::G_FMINNUM;
4423  break;
4424  case TargetOpcode::G_VECREDUCE_ADD:
4425  ScalarOpc = TargetOpcode::G_ADD;
4426  break;
4427  case TargetOpcode::G_VECREDUCE_MUL:
4428  ScalarOpc = TargetOpcode::G_MUL;
4429  break;
4430  case TargetOpcode::G_VECREDUCE_AND:
4431  ScalarOpc = TargetOpcode::G_AND;
4432  break;
4433  case TargetOpcode::G_VECREDUCE_OR:
4434  ScalarOpc = TargetOpcode::G_OR;
4435  break;
4436  case TargetOpcode::G_VECREDUCE_XOR:
4437  ScalarOpc = TargetOpcode::G_XOR;
4438  break;
4439  case TargetOpcode::G_VECREDUCE_SMAX:
4440  ScalarOpc = TargetOpcode::G_SMAX;
4441  break;
4442  case TargetOpcode::G_VECREDUCE_SMIN:
4443  ScalarOpc = TargetOpcode::G_SMIN;
4444  break;
4445  case TargetOpcode::G_VECREDUCE_UMAX:
4446  ScalarOpc = TargetOpcode::G_UMAX;
4447  break;
4448  case TargetOpcode::G_VECREDUCE_UMIN:
4449  ScalarOpc = TargetOpcode::G_UMIN;
4450  break;
4451  default:
4452  llvm_unreachable("Unhandled reduction");
4453  }
4454  return ScalarOpc;
4455 }
4456 
4458  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4459  unsigned Opc = MI.getOpcode();
4460  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4461  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4462  "Sequential reductions not expected");
4463 
4464  if (TypeIdx != 1)
4465  return UnableToLegalize;
4466 
4467  // The semantics of the normal non-sequential reductions allow us to freely
4468  // re-associate the operation.
4469  Register SrcReg = MI.getOperand(1).getReg();
4470  LLT SrcTy = MRI.getType(SrcReg);
4471  Register DstReg = MI.getOperand(0).getReg();
4472  LLT DstTy = MRI.getType(DstReg);
4473 
4474  if (NarrowTy.isVector() &&
4475  (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4476  return UnableToLegalize;
4477 
4478  unsigned ScalarOpc = getScalarOpcForReduction(Opc);
4479  SmallVector<Register> SplitSrcs;
4480  // If NarrowTy is a scalar then we're being asked to scalarize.
4481  const unsigned NumParts =
4482  NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4483  : SrcTy.getNumElements();
4484 
4485  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4486  if (NarrowTy.isScalar()) {
4487  if (DstTy != NarrowTy)
4488  return UnableToLegalize; // FIXME: handle implicit extensions.
4489 
4490  if (isPowerOf2_32(NumParts)) {
4491  // Generate a tree of scalar operations to reduce the critical path.
4492  SmallVector<Register> PartialResults;
4493  unsigned NumPartsLeft = NumParts;
4494  while (NumPartsLeft > 1) {
4495  for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4496  PartialResults.emplace_back(
4497  MIRBuilder
4498  .buildInstr(ScalarOpc, {NarrowTy},
4499  {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4500  .getReg(0));
4501  }
4502  SplitSrcs = PartialResults;
4503  PartialResults.clear();
4504  NumPartsLeft = SplitSrcs.size();
4505  }
4506  assert(SplitSrcs.size() == 1);
4507  MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4508  MI.eraseFromParent();
4509  return Legalized;
4510  }
4511  // If we can't generate a tree, then just do sequential operations.
4512  Register Acc = SplitSrcs[0];
4513  for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4514  Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4515  .getReg(0);
4516  MIRBuilder.buildCopy(DstReg, Acc);
4517  MI.eraseFromParent();
4518  return Legalized;
4519  }
4520  SmallVector<Register> PartialReductions;
4521  for (unsigned Part = 0; Part < NumParts; ++Part) {
4522  PartialReductions.push_back(
4523  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4524  }
4525 
4526 
4527  // If the types involved are powers of 2, we can generate intermediate vector
4528  // ops, before generating a final reduction operation.
4529  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4530  isPowerOf2_32(NarrowTy.getNumElements())) {
4531  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4532  }
4533 
4534  Register Acc = PartialReductions[0];
4535  for (unsigned Part = 1; Part < NumParts; ++Part) {
4536  if (Part == NumParts - 1) {
4537  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4538  {Acc, PartialReductions[Part]});
4539  } else {
4540  Acc = MIRBuilder
4541  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4542  .getReg(0);
4543  }
4544  }
4545  MI.eraseFromParent();
4546  return Legalized;
4547 }
4548 
4550 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4551  LLT SrcTy, LLT NarrowTy,
4552  unsigned ScalarOpc) {
4553  SmallVector<Register> SplitSrcs;
4554  // Split the sources into NarrowTy size pieces.
4555  extractParts(SrcReg, NarrowTy,
4556  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4557  // We're going to do a tree reduction using vector operations until we have
4558  // one NarrowTy size value left.
4559  while (SplitSrcs.size() > 1) {
4560  SmallVector<Register> PartialRdxs;
4561  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4562  Register LHS = SplitSrcs[Idx];
4563  Register RHS = SplitSrcs[Idx + 1];
4564  // Create the intermediate vector op.
4565  Register Res =
4566  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4567  PartialRdxs.push_back(Res);
4568  }
4569  SplitSrcs = std::move(PartialRdxs);
4570  }
4571  // Finally generate the requested NarrowTy based reduction.
4573  MI.getOperand(1).setReg(SplitSrcs[0]);
4575  return Legalized;
4576 }
4577 
4580  const LLT HalfTy, const LLT AmtTy) {
4581 
4582  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4583  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4584  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4585 
4586  if (Amt.isZero()) {
4587  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4588  MI.eraseFromParent();
4589  return Legalized;
4590  }
4591 
4592  LLT NVT = HalfTy;
4593  unsigned NVTBits = HalfTy.getSizeInBits();
4594  unsigned VTBits = 2 * NVTBits;
4595 
4596  SrcOp Lo(Register(0)), Hi(Register(0));
4597  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4598  if (Amt.ugt(VTBits)) {
4599  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4600  } else if (Amt.ugt(NVTBits)) {
4601  Lo = MIRBuilder.buildConstant(NVT, 0);
4602  Hi = MIRBuilder.buildShl(NVT, InL,
4603  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4604  } else if (Amt == NVTBits) {
4605  Lo = MIRBuilder.buildConstant(NVT, 0);
4606  Hi = InL;
4607  } else {
4608  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4609  auto OrLHS =
4610  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4611  auto OrRHS = MIRBuilder.buildLShr(
4612  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4613  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4614  }
4615  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4616  if (Amt.ugt(VTBits)) {
4617  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4618  } else if (Amt.ugt(NVTBits)) {
4619  Lo = MIRBuilder.buildLShr(NVT, InH,
4620  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4621  Hi = MIRBuilder.buildConstant(NVT, 0);
4622  } else if (Amt == NVTBits) {
4623  Lo = InH;
4624  Hi = MIRBuilder.buildConstant(NVT, 0);
4625  } else {
4626  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4627 
4628  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4629  auto OrRHS = MIRBuilder.buildShl(
4630  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4631 
4632  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4633  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4634  }
4635  } else {
4636  if (Amt.ugt(VTBits)) {
4637  Hi = Lo = MIRBuilder.buildAShr(
4638  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4639  } else if (Amt.ugt(NVTBits)) {
4640  Lo = MIRBuilder.buildAShr(NVT, InH,
4641  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4642  Hi = MIRBuilder.buildAShr(NVT, InH,
4643  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4644  } else if (Amt == NVTBits) {
4645  Lo = InH;
4646  Hi = MIRBuilder.buildAShr(NVT, InH,
4647  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4648  } else {
4649  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4650 
4651  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4652  auto OrRHS = MIRBuilder.buildShl(
4653  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4654 
4655  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4656  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4657  }
4658  }
4659 
4660  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4661  MI.eraseFromParent();
4662 
4663  return Legalized;
4664 }
4665 
4666 // TODO: Optimize if constant shift amount.
4669  LLT RequestedTy) {
4670  if (TypeIdx == 1) {
4672  narrowScalarSrc(MI, RequestedTy, 2);
4674  return Legalized;
4675  }
4676 
4677  Register DstReg = MI.getOperand(0).getReg();
4678  LLT DstTy = MRI.getType(DstReg);
4679  if (DstTy.isVector())
4680  return UnableToLegalize;
4681 
4682  Register Amt = MI.getOperand(2).getReg();
4683  LLT ShiftAmtTy = MRI.getType(Amt);
4684  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4685  if (DstEltSize % 2 != 0)
4686  return UnableToLegalize;
4687 
4688  // Ignore the input type. We can only go to exactly half the size of the
4689  // input. If that isn't small enough, the resulting pieces will be further
4690  // legalized.
4691  const unsigned NewBitSize = DstEltSize / 2;
4692  const LLT HalfTy = LLT::scalar(NewBitSize);
4693  const LLT CondTy = LLT::scalar(1);
4694 
4695  if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
4696  return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
4697  ShiftAmtTy);
4698  }
4699 
4700  // TODO: Expand with known bits.
4701 
4702  // Handle the fully general expansion by an unknown amount.
4703  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4704 
4705  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4706  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4707  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4708 
4709  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4710  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4711 
4712  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4713  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4714  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4715 
4716  Register ResultRegs[2];
4717  switch (MI.getOpcode()) {
4718  case TargetOpcode::G_SHL: {
4719  // Short: ShAmt < NewBitSize
4720  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4721 
4722  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4723  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4724  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4725 
4726  // Long: ShAmt >= NewBitSize
4727  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4728  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4729 
4730  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4731  auto Hi = MIRBuilder.buildSelect(
4732  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4733 
4734  ResultRegs[0] = Lo.getReg(0);
4735  ResultRegs[1] = Hi.getReg(0);
4736  break;
4737  }
4738  case TargetOpcode::G_LSHR:
4739  case TargetOpcode::G_ASHR: {
4740  // Short: ShAmt < NewBitSize
4741  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4742 
4743  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4744  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4745  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4746 
4747  // Long: ShAmt >= NewBitSize
4748  MachineInstrBuilder HiL;
4749  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4750  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4751  } else {
4752  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4753  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4754  }
4755  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4756  {InH, AmtExcess}); // Lo from Hi part.
4757 
4758  auto Lo = MIRBuilder.buildSelect(
4759  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4760 
4761  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4762 
4763  ResultRegs[0] = Lo.getReg(0);
4764  ResultRegs[1] = Hi.getReg(0);
4765  break;
4766  }
4767  default:
4768  llvm_unreachable("not a shift");
4769  }
4770 
4771  MIRBuilder.buildMerge(DstReg, ResultRegs);
4772  MI.eraseFromParent();
4773  return Legalized;
4774 }
4775 
4778  LLT MoreTy) {
4779  assert(TypeIdx == 0 && "Expecting only Idx 0");
4780 
4782  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4783  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4784  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4785  moreElementsVectorSrc(MI, MoreTy, I);
4786  }
4787 
4788